Git Overview
Git utilities for CodeMap.
DiffChunk
dataclass
Represents a logical chunk of changes.
Source code in src/codemap/git/diff_splitter/schemas.py
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
__post_init__
__post_init__() -> None
Initialize default values.
Source code in src/codemap/git/diff_splitter/schemas.py
17 18 19 20 | |
__hash__
__hash__() -> int
Make DiffChunk hashable by using the object's id.
Returns:
| Type | Description |
|---|---|
int
|
Hash value based on the object's id |
Source code in src/codemap/git/diff_splitter/schemas.py
22 23 24 25 26 27 28 29 30 | |
__eq__
__eq__(other: object) -> bool
Compare DiffChunk objects for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
object
|
Another object to compare with |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the objects are the same instance, False otherwise |
Source code in src/codemap/git/diff_splitter/schemas.py
32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
DiffSplitter
Splits Git diffs into logical chunks.
Source code in src/codemap/git/diff_splitter/splitter.py
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
__init__
__init__(
repo_root: Path,
similarity_threshold: float = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["similarity_threshold"],
directory_similarity_threshold: float = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["directory_similarity_threshold"],
min_chunks_for_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["min_chunks_for_consolidation"],
max_chunks_before_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["max_chunks_before_consolidation"],
max_file_size_for_llm: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_file_size_for_llm"],
max_log_diff_size: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_log_diff_size"],
model_name: str = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["model_name"],
) -> None
Initialize the diff splitter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_root
|
Path
|
Root directory of the Git repository |
required |
similarity_threshold
|
float
|
Threshold for grouping by content similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['similarity_threshold']
|
directory_similarity_threshold
|
float
|
Threshold for directory similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['directory_similarity_threshold']
|
min_chunks_for_consolidation
|
int
|
Min chunks to trigger consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['min_chunks_for_consolidation']
|
max_chunks_before_consolidation
|
int
|
Max chunks allowed before forced consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['max_chunks_before_consolidation']
|
max_file_size_for_llm
|
int
|
Max file size (bytes) to process for LLM context.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_file_size_for_llm']
|
max_log_diff_size
|
int
|
Max diff size (bytes) to log in debug mode.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_log_diff_size']
|
model_name
|
str
|
Name of the sentence-transformer model to use.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['model_name']
|
Source code in src/codemap/git/diff_splitter/splitter.py
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | |
repo_root
instance-attribute
repo_root = repo_root
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
directory_similarity_threshold
instance-attribute
directory_similarity_threshold = (
directory_similarity_threshold
)
min_chunks_for_consolidation
instance-attribute
min_chunks_for_consolidation = min_chunks_for_consolidation
max_chunks_before_consolidation
instance-attribute
max_chunks_before_consolidation = (
max_chunks_before_consolidation
)
max_file_size_for_llm
instance-attribute
max_file_size_for_llm = max_file_size_for_llm
max_log_diff_size
instance-attribute
max_log_diff_size = max_log_diff_size
model_name
instance-attribute
model_name = model_name
are_sentence_transformers_available
classmethod
are_sentence_transformers_available() -> bool
Check if sentence transformers are available.
Returns:
| Type | Description |
|---|---|
bool
|
True if sentence transformers are available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
120 121 122 123 124 125 126 127 128 129 | |
is_model_available
classmethod
is_model_available() -> bool
Check if embedding model is available.
Returns:
| Type | Description |
|---|---|
bool
|
True if embedding model is available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
131 132 133 134 135 136 137 138 139 140 | |
set_model_available
classmethod
set_model_available(value: bool) -> None
Set model availability flag.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
bool
|
Boolean indicating if model is available |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
142 143 144 145 146 147 148 149 150 151 | |
get_embedding_model
classmethod
get_embedding_model() -> EmbeddingModel | None
Get the embedding model.
Returns:
| Type | Description |
|---|---|
EmbeddingModel | None
|
The embedding model or None if not available |
Source code in src/codemap/git/diff_splitter/splitter.py
153 154 155 156 157 158 159 160 161 162 | |
set_embedding_model
classmethod
set_embedding_model(model: EmbeddingModel) -> None
Set the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model
|
EmbeddingModel
|
The embedding model to set |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
164 165 166 167 168 169 170 171 172 173 | |
split_diff
Split a diff into logical chunks using semantic splitting.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
tuple[list[DiffChunk], list[str]]
|
Tuple of (List of DiffChunk objects based on semantic analysis, List of filtered large files) |
Raises:
| Type | Description |
|---|---|
ValueError
|
If semantic splitting is not available or fails |
Source code in src/codemap/git/diff_splitter/splitter.py
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | |
encode_chunks
encode_chunks(chunks: list[str]) -> dict[str, ndarray]
Encode a list of text chunks using the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[str]
|
List of text chunks to encode |
required |
Returns:
| Type | Description |
|---|---|
dict[str, ndarray]
|
Dictionary with embeddings array |
Source code in src/codemap/git/diff_splitter/splitter.py
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
GitDiff
dataclass
Represents a Git diff chunk.
Source code in src/codemap/git/utils.py
14 15 16 17 18 19 20 21 | |
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
is_staged
class-attribute
instance-attribute
is_staged: bool = False
is_untracked
class-attribute
instance-attribute
is_untracked: bool = False
__init__
__init__(
files: list[str],
content: str,
is_staged: bool = False,
is_untracked: bool = False,
) -> None
GitError
Bases: Exception
Custom exception for Git-related errors.
Source code in src/codemap/git/utils.py
24 25 | |
run_git_command
run_git_command(
command: list[str],
cwd: Path | str | None = None,
environment: dict[str, str] | None = None,
) -> str
Run a git command and return its output.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
command
|
list[str]
|
Command to run as a list of string arguments |
required |
cwd
|
Path | str | None
|
Working directory to run the command in |
None
|
environment
|
dict[str, str] | None
|
Environment variables to use |
None
|
Returns:
| Type | Description |
|---|---|
str
|
The output from the command |
Raises:
| Type | Description |
|---|---|
GitError
|
If the git command fails |
Source code in src/codemap/git/utils.py
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | |
interactive
Interactive commit interface for CodeMap.
logger
module-attribute
logger = getLogger(__name__)
MAX_PREVIEW_LENGTH
module-attribute
MAX_PREVIEW_LENGTH = 200
MAX_PREVIEW_LINES
module-attribute
MAX_PREVIEW_LINES = 10
ChunkAction
Bases: Enum
Possible actions for a diff chunk.
Source code in src/codemap/git/interactive.py
29 30 31 32 33 34 35 36 37 | |
COMMIT
class-attribute
instance-attribute
COMMIT = auto()
EDIT
class-attribute
instance-attribute
EDIT = auto()
SKIP
class-attribute
instance-attribute
SKIP = auto()
ABORT
class-attribute
instance-attribute
ABORT = auto()
REGENERATE
class-attribute
instance-attribute
REGENERATE = auto()
EXIT
class-attribute
instance-attribute
EXIT = auto()
ChunkResult
dataclass
Result of processing a diff chunk.
Source code in src/codemap/git/interactive.py
40 41 42 43 44 45 | |
message
class-attribute
instance-attribute
message: str | None = None
CommitUI
Interactive UI for the commit process.
Source code in src/codemap/git/interactive.py
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 | |
__init__
__init__() -> None
Initialize the commit UI.
Source code in src/codemap/git/interactive.py
51 52 53 | |
console
instance-attribute
console = Console()
display_chunk
display_chunk(
chunk: DiffChunk, index: int = 0, total: int = 1
) -> None
Display a diff chunk to the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
DiffChunk to display |
required |
index
|
int
|
The 0-based index of the current chunk |
0
|
total
|
int
|
The total number of chunks |
1
|
Source code in src/codemap/git/interactive.py
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | |
display_group
display_group(
group: SemanticGroup, index: int = 0, total: int = 1
) -> None
Display a semantic group to the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
group
|
SemanticGroup
|
SemanticGroup to display |
required |
index
|
int
|
The 0-based index of the current group |
0
|
total
|
int
|
The total number of groups |
1
|
Source code in src/codemap/git/interactive.py
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | |
display_message
display_message(
message: str, is_llm_generated: bool = False
) -> None
Display a commit message to the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to display |
required |
is_llm_generated
|
bool
|
Whether the message was generated by an LLM |
False
|
Source code in src/codemap/git/interactive.py
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | |
get_user_action
get_user_action() -> ChunkAction
Get the user's desired action for the current chunk.
Returns:
| Type | Description |
|---|---|
ChunkAction
|
ChunkAction indicating what to do with the chunk |
Source code in src/codemap/git/interactive.py
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | |
get_user_action_on_lint_failure
get_user_action_on_lint_failure() -> ChunkAction
Get the user's desired action when linting fails.
Returns:
| Type | Description |
|---|---|
ChunkAction
|
ChunkAction indicating what to do. |
Source code in src/codemap/git/interactive.py
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | |
edit_message
edit_message(current_message: str) -> str
Get an edited commit message from the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
current_message
|
str
|
Current commit message |
required |
Returns:
| Type | Description |
|---|---|
str
|
Edited commit message |
Source code in src/codemap/git/interactive.py
300 301 302 303 304 305 306 307 308 309 310 311 312 313 | |
process_chunk
process_chunk(
chunk: DiffChunk, index: int = 0, total: int = 1
) -> ChunkResult
Process a single diff chunk interactively.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
DiffChunk to process |
required |
index
|
int
|
The 0-based index of the current chunk |
0
|
total
|
int
|
The total number of chunks |
1
|
Returns:
| Type | Description |
|---|---|
ChunkResult
|
ChunkResult with the user's action and any modified message |
Source code in src/codemap/git/interactive.py
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 | |
confirm_abort
confirm_abort() -> bool
Ask the user to confirm aborting the commit process.
Returns:
| Type | Description |
|---|---|
bool
|
True if the user confirms, False otherwise |
Raises:
| Type | Description |
|---|---|
Exit
|
When the user confirms exiting |
Source code in src/codemap/git/interactive.py
343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | |
confirm_bypass_hooks
confirm_bypass_hooks() -> ChunkAction
Ask the user what to do when git hooks fail.
Returns:
| Type | Description |
|---|---|
ChunkAction
|
ChunkAction indicating what to do next |
Source code in src/codemap/git/interactive.py
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 | |
show_success
show_success(message: str) -> None
Show a success message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Message to display |
required |
Source code in src/codemap/git/interactive.py
401 402 403 404 405 406 407 408 409 | |
show_warning
show_warning(message: str) -> None
Show a warning message to the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Warning message to display |
required |
Source code in src/codemap/git/interactive.py
411 412 413 414 415 416 417 418 419 | |
show_error
show_error(message: str) -> None
Show an error message to the user.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Error message to display |
required |
Source code in src/codemap/git/interactive.py
421 422 423 424 425 426 427 428 429 430 431 432 433 434 | |
show_skipped
show_skipped(files: list[str]) -> None
Show which files were skipped.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of skipped files |
required |
Source code in src/codemap/git/interactive.py
436 437 438 439 440 441 442 443 444 445 446 447 | |
show_message
show_message(message: str) -> None
Show a general informational message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Message to display |
required |
Source code in src/codemap/git/interactive.py
449 450 451 452 453 454 455 456 457 | |
show_regenerating
show_regenerating() -> None
Show message indicating message regeneration.
Source code in src/codemap/git/interactive.py
459 460 461 | |
show_all_committed
show_all_committed() -> None
Show message indicating all changes are committed.
Source code in src/codemap/git/interactive.py
463 464 465 | |
show_all_done
show_all_done() -> None
Show a final success message when the process completes.
This is an alias for show_all_committed for now, but could be customized.
Source code in src/codemap/git/interactive.py
467 468 469 470 471 472 473 474 475 | |
show_lint_errors
show_lint_errors(errors: list[str]) -> None
Display linting errors to the user.
Source code in src/codemap/git/interactive.py
477 478 479 480 481 | |
confirm_commit_with_lint_errors
confirm_commit_with_lint_errors() -> bool
Ask the user if they want to commit despite lint errors.
Source code in src/codemap/git/interactive.py
483 484 485 | |
confirm_exit
confirm_exit() -> bool
Ask the user to confirm exiting without committing.
Source code in src/codemap/git/interactive.py
487 488 489 | |
display_failed_lint_message
display_failed_lint_message(
message: str,
lint_errors: list[str],
is_llm_generated: bool = False,
) -> None
Display a commit message that failed linting, along with the errors.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to display. |
required |
lint_errors
|
list[str]
|
List of linting error messages. |
required |
is_llm_generated
|
bool
|
Whether the message was generated by an LLM. |
False
|
Source code in src/codemap/git/interactive.py
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 | |
get_group_action
get_group_action() -> ChunkAction
Get the user's desired action for the current semantic group.
Returns:
| Type | Description |
|---|---|
ChunkAction
|
ChunkAction indicating what to do with the group |
Source code in src/codemap/git/interactive.py
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 | |
utils
Git utilities for CodeMap.
logger
module-attribute
logger = getLogger(__name__)
GitDiff
dataclass
Represents a Git diff chunk.
Source code in src/codemap/git/utils.py
14 15 16 17 18 19 20 21 | |
__init__
__init__(
files: list[str],
content: str,
is_staged: bool = False,
is_untracked: bool = False,
) -> None
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
is_staged
class-attribute
instance-attribute
is_staged: bool = False
is_untracked
class-attribute
instance-attribute
is_untracked: bool = False
GitError
Bases: Exception
Custom exception for Git-related errors.
Source code in src/codemap/git/utils.py
24 25 | |
run_git_command
run_git_command(
command: list[str],
cwd: Path | str | None = None,
environment: dict[str, str] | None = None,
) -> str
Run a git command and return its output.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
command
|
list[str]
|
Command to run as a list of string arguments |
required |
cwd
|
Path | str | None
|
Working directory to run the command in |
None
|
environment
|
dict[str, str] | None
|
Environment variables to use |
None
|
Returns:
| Type | Description |
|---|---|
str
|
The output from the command |
Raises:
| Type | Description |
|---|---|
GitError
|
If the git command fails |
Source code in src/codemap/git/utils.py
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | |
get_repo_root
get_repo_root(path: Path | None = None) -> Path
Get the root directory of the Git repository.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path
|
Path | None
|
Optional path to start searching from |
None
|
Returns:
| Type | Description |
|---|---|
Path
|
Path to repository root |
Raises:
| Type | Description |
|---|---|
GitError
|
If not in a Git repository |
Source code in src/codemap/git/utils.py
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | |
validate_repo_path
validate_repo_path(path: Path | None = None) -> Path | None
Validate and return the repository path.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path
|
Path | None
|
Optional path to validate (defaults to current directory) |
None
|
Returns:
| Type | Description |
|---|---|
Path | None
|
Path to the repository root if valid, None otherwise |
Source code in src/codemap/git/utils.py
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | |
get_staged_diff
get_staged_diff() -> GitDiff
Get the diff of staged changes.
Returns:
| Type | Description |
|---|---|
GitDiff
|
GitDiff object containing staged changes |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/utils.py
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | |
get_unstaged_diff
get_unstaged_diff() -> GitDiff
Get the diff of unstaged changes.
Returns:
| Type | Description |
|---|---|
GitDiff
|
GitDiff object containing unstaged changes |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/utils.py
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 | |
stage_files
stage_files(files: list[str]) -> None
Stage the specified files.
This function intelligently handles both existing and deleted files:
- For existing files, it uses git add
- For files that no longer exist but are tracked by git, it uses git rm
- For files that no longer exist but are still in index, it uses git rm --cached
This prevents errors when trying to stage files that have been deleted but not yet tracked in git.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of files to stage |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If staging fails |
Source code in src/codemap/git/utils.py
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | |
commit
commit(message: str) -> None
Create a commit with the given message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Commit message |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If commit fails |
Source code in src/codemap/git/utils.py
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 | |
get_other_staged_files
get_other_staged_files(
targeted_files: list[str],
) -> list[str]
Get staged files that are not part of the targeted files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
targeted_files
|
list[str]
|
List of files that are meant to be committed |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
List of other staged files that might be committed inadvertently |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/utils.py
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | |
stash_staged_changes
stash_staged_changes(exclude_files: list[str]) -> bool
Temporarily stash staged changes except for specified files.
This is used to ensure only specific files are committed when other files might be mistakenly staged.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
exclude_files
|
list[str]
|
Files to exclude from stashing (to keep staged) |
required |
Returns:
| Type | Description |
|---|---|
bool
|
Whether stashing was performed |
Raises:
| Type | Description |
|---|---|
GitError
|
If git operations fail |
Source code in src/codemap/git/utils.py
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 | |
unstash_changes
unstash_changes() -> None
Restore previously stashed changes.
Raises:
| Type | Description |
|---|---|
GitError
|
If git operations fail |
Source code in src/codemap/git/utils.py
421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 | |
commit_only_files
commit_only_files(
files: list[str],
message: str,
*,
commit_options: list[str] | None = None,
ignore_hooks: bool = False,
) -> list[str]
Commit only the specified files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of files to commit |
required |
message
|
str
|
Commit message |
required |
commit_options
|
list[str] | None
|
Additional commit options |
None
|
ignore_hooks
|
bool
|
Whether to ignore Git hooks |
False
|
Returns:
| Type | Description |
|---|---|
list[str]
|
List of other staged files that weren't committed |
Source code in src/codemap/git/utils.py
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 | |
get_untracked_files
get_untracked_files() -> list[str]
Get a list of untracked files in the repository.
These are files that are not yet tracked by Git (new files that haven't been staged).
Returns:
| Type | Description |
|---|---|
list[str]
|
List of untracked file paths |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/utils.py
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 | |
unstage_files
unstage_files(files: list[str]) -> None
Unstage the specified files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of files to unstage |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If unstaging fails |
Source code in src/codemap/git/utils.py
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 | |
switch_branch
switch_branch(branch_name: str) -> None
Switch the current Git branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
The name of the branch to switch to. |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If the git checkout command fails. |
Source code in src/codemap/git/utils.py
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 | |
get_current_branch
get_current_branch() -> str
Get the name of the current branch.
Returns:
| Type | Description |
|---|---|
str
|
Name of the current branch |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/utils.py
595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 | |
is_git_ignored
is_git_ignored(file_path: str) -> bool
Check if a file is ignored by Git.
Source code in src/codemap/git/utils.py
613 614 615 616 617 618 | |
commit_linter
Commit linter package for validating git commit messages according to conventional commits.
This package provides modules for parsing, validating, and configuring commit message linting.
CommitLintConfig
dataclass
Configuration for commit message linting rules.
Rather than providing default values here, this class now loads its configuration from the central config.py file via ConfigLoader.
Source code in src/codemap/git/commit_linter/config.py
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 | |
header_max_length
class-attribute
instance-attribute
header_max_length: Rule = field(
default_factory=lambda: Rule(
name="header-max-length",
condition="header has value or less characters",
rule="always",
value=100,
level=ERROR,
)
)
header_min_length
class-attribute
instance-attribute
header_min_length: Rule = field(
default_factory=lambda: Rule(
name="header-min-length",
condition="header has value or more characters",
rule="always",
value=0,
)
)
header_case
class-attribute
instance-attribute
header_case: Rule = field(
default_factory=lambda: Rule(
name="header-case",
condition="header is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)
)
header_full_stop
class-attribute
instance-attribute
header_full_stop: Rule = field(
default_factory=lambda: Rule(
name="header-full-stop",
condition="header ends with value",
rule="never",
value=".",
)
)
header_trim
class-attribute
instance-attribute
header_trim: Rule = field(
default_factory=lambda: Rule(
name="header-trim",
condition="header must not have initial and/or trailing whitespaces",
rule="always",
)
)
type_enum
class-attribute
instance-attribute
type_enum: Rule = field(
default_factory=lambda: Rule(
name="type-enum",
condition="type is found in value",
rule="always",
value=[],
)
)
type_case
class-attribute
instance-attribute
type_case: Rule = field(
default_factory=lambda: Rule(
name="type-case",
condition="type is in case value",
rule="always",
value="lower-case",
)
)
type_empty
class-attribute
instance-attribute
type_empty: Rule = field(
default_factory=lambda: Rule(
name="type-empty",
condition="type is empty",
rule="never",
)
)
scope_enum
class-attribute
instance-attribute
scope_enum: Rule = field(
default_factory=lambda: Rule(
name="scope-enum",
condition="scope is found in value",
rule="always",
value=[],
level=DISABLED,
)
)
scope_case
class-attribute
instance-attribute
scope_case: Rule = field(
default_factory=lambda: Rule(
name="scope-case",
condition="scope is in case value",
rule="always",
value="lower-case",
)
)
scope_empty
class-attribute
instance-attribute
scope_empty: Rule = field(
default_factory=lambda: Rule(
name="scope-empty",
condition="scope is empty",
rule="never",
level=DISABLED,
)
)
subject_case
class-attribute
instance-attribute
subject_case: Rule = field(
default_factory=lambda: Rule(
name="subject-case",
condition="subject is in case value",
rule="always",
value=[
"sentence-case",
"start-case",
"pascal-case",
"upper-case",
],
)
)
subject_empty
class-attribute
instance-attribute
subject_empty: Rule = field(
default_factory=lambda: Rule(
name="subject-empty",
condition="subject is empty",
rule="never",
)
)
subject_full_stop
class-attribute
instance-attribute
subject_full_stop: Rule = field(
default_factory=lambda: Rule(
name="subject-full-stop",
condition="subject ends with value",
rule="never",
value=".",
)
)
subject_exclamation_mark
class-attribute
instance-attribute
subject_exclamation_mark: Rule = field(
default_factory=lambda: Rule(
name="subject-exclamation-mark",
condition="subject has exclamation before the : marker",
rule="never",
level=DISABLED,
)
)
body_leading_blank
class-attribute
instance-attribute
body_leading_blank: Rule = field(
default_factory=lambda: Rule(
name="body-leading-blank",
condition="body begins with blank line",
rule="always",
level=WARNING,
)
)
body_empty
class-attribute
instance-attribute
body_empty: Rule = field(
default_factory=lambda: Rule(
name="body-empty",
condition="body is empty",
rule="never",
level=DISABLED,
)
)
body_max_line_length
class-attribute
instance-attribute
body_max_line_length: Rule = field(
default_factory=lambda: Rule(
name="body-max-line-length",
condition="body lines has value or less characters",
rule="always",
value=100,
)
)
footer_leading_blank
class-attribute
instance-attribute
footer_leading_blank: Rule = field(
default_factory=lambda: Rule(
name="footer-leading-blank",
condition="footer begins with blank line",
rule="always",
level=WARNING,
)
)
footer_empty
class-attribute
instance-attribute
footer_empty: Rule = field(
default_factory=lambda: Rule(
name="footer-empty",
condition="footer is empty",
rule="never",
level=DISABLED,
)
)
footer_max_line_length
class-attribute
instance-attribute
footer_max_line_length: Rule = field(
default_factory=lambda: Rule(
name="footer-max-line-length",
condition="footer lines has value or less characters",
rule="always",
value=100,
)
)
type_max_length
class-attribute
instance-attribute
type_max_length: Rule = field(
default_factory=lambda: Rule(
name="type-max-length",
condition="type has value or less characters",
rule="always",
value=float("inf"),
)
)
type_min_length
class-attribute
instance-attribute
type_min_length: Rule = field(
default_factory=lambda: Rule(
name="type-min-length",
condition="type has value or more characters",
rule="always",
value=0,
)
)
scope_max_length
class-attribute
instance-attribute
scope_max_length: Rule = field(
default_factory=lambda: Rule(
name="scope-max-length",
condition="scope has value or less characters",
rule="always",
value=float("inf"),
)
)
scope_min_length
class-attribute
instance-attribute
scope_min_length: Rule = field(
default_factory=lambda: Rule(
name="scope-min-length",
condition="scope has value or more characters",
rule="always",
value=0,
)
)
subject_max_length
class-attribute
instance-attribute
subject_max_length: Rule = field(
default_factory=lambda: Rule(
name="subject-max-length",
condition="subject has value or less characters",
rule="always",
value=float("inf"),
)
)
subject_min_length
class-attribute
instance-attribute
subject_min_length: Rule = field(
default_factory=lambda: Rule(
name="subject-min-length",
condition="subject has value or more characters",
rule="always",
value=0,
)
)
body_max_length
class-attribute
instance-attribute
body_max_length: Rule = field(
default_factory=lambda: Rule(
name="body-max-length",
condition="body has value or less characters",
rule="always",
value=float("inf"),
)
)
body_min_length
class-attribute
instance-attribute
body_min_length: Rule = field(
default_factory=lambda: Rule(
name="body-min-length",
condition="body has value or more characters",
rule="always",
value=0,
)
)
body_case
class-attribute
instance-attribute
body_case: Rule = field(
default_factory=lambda: Rule(
name="body-case",
condition="body is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)
)
body_full_stop
class-attribute
instance-attribute
body_full_stop: Rule = field(
default_factory=lambda: Rule(
name="body-full-stop",
condition="body ends with value",
rule="never",
value=".",
level=DISABLED,
)
)
references_empty
class-attribute
instance-attribute
references_empty: Rule = field(
default_factory=lambda: Rule(
name="references-empty",
condition="references has at least one entry",
rule="never",
level=DISABLED,
)
)
signed_off_by
class-attribute
instance-attribute
signed_off_by: Rule = field(
default_factory=lambda: Rule(
name="signed-off-by",
condition="message has value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)
)
trailer_exists
class-attribute
instance-attribute
trailer_exists: Rule = field(
default_factory=lambda: Rule(
name="trailer-exists",
condition="message has trailer value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)
)
footer_max_length
class-attribute
instance-attribute
footer_max_length: Rule = field(
default_factory=lambda: Rule(
name="footer-max-length",
condition="footer has value or less characters",
rule="always",
value=float("inf"),
)
)
footer_min_length
class-attribute
instance-attribute
footer_min_length: Rule = field(
default_factory=lambda: Rule(
name="footer-min-length",
condition="footer has value or more characters",
rule="always",
value=0,
)
)
from_dict
classmethod
from_dict(
config_dict: dict[str, Any],
config_loader: ConfigLoader | None = None,
) -> CommitLintConfig
Create a CommitLintConfig from a dictionary.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
config_dict
|
dict[str, Any]
|
Configuration dictionary to parse |
required |
config_loader
|
ConfigLoader | None
|
Optional ConfigLoader instance for retrieving additional configuration |
None
|
Returns:
| Name | Type | Description |
|---|---|---|
CommitLintConfig |
CommitLintConfig
|
Configured instance |
Source code in src/codemap/git/commit_linter/config.py
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | |
get_all_rules
get_all_rules() -> list[Rule]
Get all rules as a list.
Source code in src/codemap/git/commit_linter/config.py
445 446 447 448 449 450 451 | |
__init__
__init__(
header_max_length: Rule = lambda: Rule(
name="header-max-length",
condition="header has value or less characters",
rule="always",
value=100,
level=ERROR,
)(),
header_min_length: Rule = lambda: Rule(
name="header-min-length",
condition="header has value or more characters",
rule="always",
value=0,
)(),
header_case: Rule = lambda: Rule(
name="header-case",
condition="header is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)(),
header_full_stop: Rule = lambda: Rule(
name="header-full-stop",
condition="header ends with value",
rule="never",
value=".",
)(),
header_trim: Rule = lambda: Rule(
name="header-trim",
condition="header must not have initial and/or trailing whitespaces",
rule="always",
)(),
type_enum: Rule = lambda: Rule(
name="type-enum",
condition="type is found in value",
rule="always",
value=[],
)(),
type_case: Rule = lambda: Rule(
name="type-case",
condition="type is in case value",
rule="always",
value="lower-case",
)(),
type_empty: Rule = lambda: Rule(
name="type-empty",
condition="type is empty",
rule="never",
)(),
scope_enum: Rule = lambda: Rule(
name="scope-enum",
condition="scope is found in value",
rule="always",
value=[],
level=DISABLED,
)(),
scope_case: Rule = lambda: Rule(
name="scope-case",
condition="scope is in case value",
rule="always",
value="lower-case",
)(),
scope_empty: Rule = lambda: Rule(
name="scope-empty",
condition="scope is empty",
rule="never",
level=DISABLED,
)(),
subject_case: Rule = lambda: Rule(
name="subject-case",
condition="subject is in case value",
rule="always",
value=[
"sentence-case",
"start-case",
"pascal-case",
"upper-case",
],
)(),
subject_empty: Rule = lambda: Rule(
name="subject-empty",
condition="subject is empty",
rule="never",
)(),
subject_full_stop: Rule = lambda: Rule(
name="subject-full-stop",
condition="subject ends with value",
rule="never",
value=".",
)(),
subject_exclamation_mark: Rule = lambda: Rule(
name="subject-exclamation-mark",
condition="subject has exclamation before the : marker",
rule="never",
level=DISABLED,
)(),
body_leading_blank: Rule = lambda: Rule(
name="body-leading-blank",
condition="body begins with blank line",
rule="always",
level=WARNING,
)(),
body_empty: Rule = lambda: Rule(
name="body-empty",
condition="body is empty",
rule="never",
level=DISABLED,
)(),
body_max_line_length: Rule = lambda: Rule(
name="body-max-line-length",
condition="body lines has value or less characters",
rule="always",
value=100,
)(),
footer_leading_blank: Rule = lambda: Rule(
name="footer-leading-blank",
condition="footer begins with blank line",
rule="always",
level=WARNING,
)(),
footer_empty: Rule = lambda: Rule(
name="footer-empty",
condition="footer is empty",
rule="never",
level=DISABLED,
)(),
footer_max_line_length: Rule = lambda: Rule(
name="footer-max-line-length",
condition="footer lines has value or less characters",
rule="always",
value=100,
)(),
type_max_length: Rule = lambda: Rule(
name="type-max-length",
condition="type has value or less characters",
rule="always",
value=float("inf"),
)(),
type_min_length: Rule = lambda: Rule(
name="type-min-length",
condition="type has value or more characters",
rule="always",
value=0,
)(),
scope_max_length: Rule = lambda: Rule(
name="scope-max-length",
condition="scope has value or less characters",
rule="always",
value=float("inf"),
)(),
scope_min_length: Rule = lambda: Rule(
name="scope-min-length",
condition="scope has value or more characters",
rule="always",
value=0,
)(),
subject_max_length: Rule = lambda: Rule(
name="subject-max-length",
condition="subject has value or less characters",
rule="always",
value=float("inf"),
)(),
subject_min_length: Rule = lambda: Rule(
name="subject-min-length",
condition="subject has value or more characters",
rule="always",
value=0,
)(),
body_max_length: Rule = lambda: Rule(
name="body-max-length",
condition="body has value or less characters",
rule="always",
value=float("inf"),
)(),
body_min_length: Rule = lambda: Rule(
name="body-min-length",
condition="body has value or more characters",
rule="always",
value=0,
)(),
body_case: Rule = lambda: Rule(
name="body-case",
condition="body is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)(),
body_full_stop: Rule = lambda: Rule(
name="body-full-stop",
condition="body ends with value",
rule="never",
value=".",
level=DISABLED,
)(),
references_empty: Rule = lambda: Rule(
name="references-empty",
condition="references has at least one entry",
rule="never",
level=DISABLED,
)(),
signed_off_by: Rule = lambda: Rule(
name="signed-off-by",
condition="message has value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)(),
trailer_exists: Rule = lambda: Rule(
name="trailer-exists",
condition="message has trailer value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)(),
footer_max_length: Rule = lambda: Rule(
name="footer-max-length",
condition="footer has value or less characters",
rule="always",
value=float("inf"),
)(),
footer_min_length: Rule = lambda: Rule(
name="footer-min-length",
condition="footer has value or more characters",
rule="always",
value=0,
)(),
) -> None
Rule
dataclass
A rule configuration for commit linting.
Source code in src/codemap/git/commit_linter/config.py
26 27 28 29 30 31 32 33 34 | |
name
instance-attribute
name: str
condition
instance-attribute
condition: str
rule
class-attribute
instance-attribute
rule: Literal['always', 'never'] = 'always'
value
class-attribute
instance-attribute
value: Any = None
RuleLevel
Bases: Enum
Enforcement level for a linting rule.
Source code in src/codemap/git/commit_linter/config.py
18 19 20 21 22 23 | |
DISABLED
class-attribute
instance-attribute
DISABLED = 0
WARNING
class-attribute
instance-attribute
WARNING = 1
ERROR
class-attribute
instance-attribute
ERROR = 2
CommitLinter
Lints commit messages based on the Conventional Commits specification v1.0.0.
Source code in src/codemap/git/commit_linter/linter.py
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 | |
__init__
__init__(
allowed_types: list[str] | None = None,
config: CommitLintConfig | None = None,
config_path: str | None = None,
config_loader: ConfigLoader | None = None,
) -> None
Initialize the linter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
allowed_types
|
List[str]
|
Override list of allowed commit types. |
None
|
config
|
CommitLintConfig
|
Configuration object for the linter. |
None
|
config_path
|
str
|
Path to a configuration file (.codemap.yml). |
None
|
config_loader
|
ConfigLoader
|
Config loader instance to use (dependency injection). |
None
|
Source code in src/codemap/git/commit_linter/linter.py
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | |
config_loader
instance-attribute
config_loader = config_loader or ConfigLoader(
config_file=config_path, repo_root=repo_root
)
allowed_types
instance-attribute
allowed_types = {
lower() for t in allowed_types or default_types
}
config
instance-attribute
config = config
lint
lint(message: str) -> tuple[bool, list[str]]
Lints the commit message against Conventional Commits v1.0.0.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to lint |
required |
Returns:
| Type | Description |
|---|---|
tuple[bool, list[str]]
|
tuple[bool, list[str]]: (is_valid, list_of_messages) |
Source code in src/codemap/git/commit_linter/linter.py
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | |
is_valid
is_valid(message: str) -> bool
Checks if the commit message is valid (no errors).
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to validate |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if message is valid, False otherwise |
Source code in src/codemap/git/commit_linter/linter.py
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | |
create_linter
create_linter(
allowed_types: list[str] | None = None,
config: CommitLintConfig | None = None,
config_path: str | None = None,
config_loader: ConfigLoader | None = None,
repo_root: Path | None = None,
) -> CommitLinter
Create a CommitLinter with proper dependency injection for configuration.
This factory function follows the Chain of Responsibility pattern for configuration management, ensuring the linter uses the same ConfigLoader instance as the rest of the application.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
allowed_types
|
list[str] | None
|
Override list of allowed commit types |
None
|
config
|
CommitLintConfig | None
|
Pre-configured CommitLintConfig object |
None
|
config_path
|
str | None
|
Path to a configuration file |
None
|
config_loader
|
ConfigLoader | None
|
ConfigLoader instance for configuration (recommended) |
None
|
repo_root
|
Path | None
|
Repository root path |
None
|
Returns:
| Name | Type | Description |
|---|---|---|
CommitLinter |
CommitLinter
|
Configured commit linter instance |
Source code in src/codemap/git/commit_linter/__init__.py
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | |
linter
Main linter module for commit messages.
BODY_MAX_LINE_LENGTH
module-attribute
BODY_MAX_LINE_LENGTH = DEFAULT_CONFIG["commit"]["lint"][
"body_max_line_length"
]["value"]
CommitLinter
Lints commit messages based on the Conventional Commits specification v1.0.0.
Source code in src/codemap/git/commit_linter/linter.py
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 | |
__init__
__init__(
allowed_types: list[str] | None = None,
config: CommitLintConfig | None = None,
config_path: str | None = None,
config_loader: ConfigLoader | None = None,
) -> None
Initialize the linter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
allowed_types
|
List[str]
|
Override list of allowed commit types. |
None
|
config
|
CommitLintConfig
|
Configuration object for the linter. |
None
|
config_path
|
str
|
Path to a configuration file (.codemap.yml). |
None
|
config_loader
|
ConfigLoader
|
Config loader instance to use (dependency injection). |
None
|
Source code in src/codemap/git/commit_linter/linter.py
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | |
config_loader
instance-attribute
config_loader = config_loader or ConfigLoader(
config_file=config_path, repo_root=repo_root
)
allowed_types
instance-attribute
allowed_types = {
lower() for t in allowed_types or default_types
}
config
instance-attribute
config = config
lint
lint(message: str) -> tuple[bool, list[str]]
Lints the commit message against Conventional Commits v1.0.0.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to lint |
required |
Returns:
| Type | Description |
|---|---|
tuple[bool, list[str]]
|
tuple[bool, list[str]]: (is_valid, list_of_messages) |
Source code in src/codemap/git/commit_linter/linter.py
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | |
is_valid
is_valid(message: str) -> bool
Checks if the commit message is valid (no errors).
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to validate |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if message is valid, False otherwise |
Source code in src/codemap/git/commit_linter/linter.py
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | |
parser
Parsing utilities for commit messages.
MatchLike
Bases: Protocol
Protocol for objects that behave like re.Match.
Source code in src/codemap/git/commit_linter/parser.py
16 17 18 19 20 21 22 23 24 25 | |
groupdict
groupdict() -> dict[str, Any]
Return the dictionary mapping group names to the matched values.
Source code in src/codemap/git/commit_linter/parser.py
19 20 21 | |
group
group(group_id: int | str = 0) -> str | None
Return the match group by number or name.
Source code in src/codemap/git/commit_linter/parser.py
23 24 25 | |
CommitParser
Parser for conventional commit messages.
This parser handles parsing and validation of commit messages following the Conventional Commits specification. It supports extracting commit type, scope, description, body, and footers.
Source code in src/codemap/git/commit_linter/parser.py
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 | |
__init__
__init__() -> None
Initialize the commit parser.
Source code in src/codemap/git/commit_linter/parser.py
35 36 37 38 39 | |
parse_commit
parse_commit(message: str) -> MatchLike | None
Parse a commit message using the main regex pattern.
This method parses the commit message according to the Conventional Commits specification, extracting the header, body, and footers. It handles cases where footers might not be immediately detected by the main regex pattern.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The raw commit message string to parse. |
required |
Returns:
| Type | Description |
|---|---|
MatchLike | None
|
A MatchLike object containing the parsed commit components (type, scope, description, |
MatchLike | None
|
body, footers) if successful, or None if the message doesn't match the expected format. |
MatchLike | None
|
The returned object provides access to match groups via group() and groupdict() methods, |
MatchLike | None
|
with the addition of a 'footers' group that may be detected beyond the main regex match. |
Source code in src/codemap/git/commit_linter/parser.py
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | |
parse_footers
parse_footers(
footers_str: str | None,
) -> list[dict[str, Any]]
Parses commit footers from a string, handling multi-line values.
Parses footer lines according to Conventional Commits specification, where each footer consists of a token, separator, and value. Handles both strict uppercase tokens and potential invalid footers for error reporting. Preserves multi-line values and blank lines within footer values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
footers_str
|
str | None
|
The string containing footer lines to parse. May be None if no footers exist. |
required |
Returns:
| Type | Description |
|---|---|
list[dict[str, Any]]
|
A list of dictionaries, where each dictionary represents a parsed footer with keys: |
list[dict[str, Any]]
|
|
list[dict[str, Any]]
|
|
list[dict[str, Any]]
|
|
Note
For invalid footers (those not matching strict regex but looking like footers), the dictionary will still be created but marked as invalid during validation.
Source code in src/codemap/git/commit_linter/parser.py
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | |
split_body_footers
split_body_footers(
body_and_footers_str: str | None,
) -> tuple[str | None, str | None]
Splits the text after the header into body and footers.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
body_and_footers_str
|
str | None
|
The string containing both body and footers text, or None. |
required |
Returns:
| Type | Description |
|---|---|
tuple[str | None, str | None]
|
A tuple containing: - First element: The body text as a string, or None if empty/not present - Second element: The footers text as a string, or None if empty/not present |
Source code in src/codemap/git/commit_linter/parser.py
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 | |
config
Configuration for commit message linting.
This module defines the configuration structures and rules for linting commit messages according to Conventional Commits specifications.
RuleLevel
Bases: Enum
Enforcement level for a linting rule.
Source code in src/codemap/git/commit_linter/config.py
18 19 20 21 22 23 | |
DISABLED
class-attribute
instance-attribute
DISABLED = 0
WARNING
class-attribute
instance-attribute
WARNING = 1
ERROR
class-attribute
instance-attribute
ERROR = 2
Rule
dataclass
A rule configuration for commit linting.
Source code in src/codemap/git/commit_linter/config.py
26 27 28 29 30 31 32 33 34 | |
__init__
__init__(
name: str,
condition: str,
rule: Literal["always", "never"] = "always",
level: RuleLevel = ERROR,
value: Any = None,
) -> None
name
instance-attribute
name: str
condition
instance-attribute
condition: str
rule
class-attribute
instance-attribute
rule: Literal['always', 'never'] = 'always'
value
class-attribute
instance-attribute
value: Any = None
CommitLintConfig
dataclass
Configuration for commit message linting rules.
Rather than providing default values here, this class now loads its configuration from the central config.py file via ConfigLoader.
Source code in src/codemap/git/commit_linter/config.py
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 | |
__init__
__init__(
header_max_length: Rule = lambda: Rule(
name="header-max-length",
condition="header has value or less characters",
rule="always",
value=100,
level=ERROR,
)(),
header_min_length: Rule = lambda: Rule(
name="header-min-length",
condition="header has value or more characters",
rule="always",
value=0,
)(),
header_case: Rule = lambda: Rule(
name="header-case",
condition="header is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)(),
header_full_stop: Rule = lambda: Rule(
name="header-full-stop",
condition="header ends with value",
rule="never",
value=".",
)(),
header_trim: Rule = lambda: Rule(
name="header-trim",
condition="header must not have initial and/or trailing whitespaces",
rule="always",
)(),
type_enum: Rule = lambda: Rule(
name="type-enum",
condition="type is found in value",
rule="always",
value=[],
)(),
type_case: Rule = lambda: Rule(
name="type-case",
condition="type is in case value",
rule="always",
value="lower-case",
)(),
type_empty: Rule = lambda: Rule(
name="type-empty",
condition="type is empty",
rule="never",
)(),
scope_enum: Rule = lambda: Rule(
name="scope-enum",
condition="scope is found in value",
rule="always",
value=[],
level=DISABLED,
)(),
scope_case: Rule = lambda: Rule(
name="scope-case",
condition="scope is in case value",
rule="always",
value="lower-case",
)(),
scope_empty: Rule = lambda: Rule(
name="scope-empty",
condition="scope is empty",
rule="never",
level=DISABLED,
)(),
subject_case: Rule = lambda: Rule(
name="subject-case",
condition="subject is in case value",
rule="always",
value=[
"sentence-case",
"start-case",
"pascal-case",
"upper-case",
],
)(),
subject_empty: Rule = lambda: Rule(
name="subject-empty",
condition="subject is empty",
rule="never",
)(),
subject_full_stop: Rule = lambda: Rule(
name="subject-full-stop",
condition="subject ends with value",
rule="never",
value=".",
)(),
subject_exclamation_mark: Rule = lambda: Rule(
name="subject-exclamation-mark",
condition="subject has exclamation before the : marker",
rule="never",
level=DISABLED,
)(),
body_leading_blank: Rule = lambda: Rule(
name="body-leading-blank",
condition="body begins with blank line",
rule="always",
level=WARNING,
)(),
body_empty: Rule = lambda: Rule(
name="body-empty",
condition="body is empty",
rule="never",
level=DISABLED,
)(),
body_max_line_length: Rule = lambda: Rule(
name="body-max-line-length",
condition="body lines has value or less characters",
rule="always",
value=100,
)(),
footer_leading_blank: Rule = lambda: Rule(
name="footer-leading-blank",
condition="footer begins with blank line",
rule="always",
level=WARNING,
)(),
footer_empty: Rule = lambda: Rule(
name="footer-empty",
condition="footer is empty",
rule="never",
level=DISABLED,
)(),
footer_max_line_length: Rule = lambda: Rule(
name="footer-max-line-length",
condition="footer lines has value or less characters",
rule="always",
value=100,
)(),
type_max_length: Rule = lambda: Rule(
name="type-max-length",
condition="type has value or less characters",
rule="always",
value=float("inf"),
)(),
type_min_length: Rule = lambda: Rule(
name="type-min-length",
condition="type has value or more characters",
rule="always",
value=0,
)(),
scope_max_length: Rule = lambda: Rule(
name="scope-max-length",
condition="scope has value or less characters",
rule="always",
value=float("inf"),
)(),
scope_min_length: Rule = lambda: Rule(
name="scope-min-length",
condition="scope has value or more characters",
rule="always",
value=0,
)(),
subject_max_length: Rule = lambda: Rule(
name="subject-max-length",
condition="subject has value or less characters",
rule="always",
value=float("inf"),
)(),
subject_min_length: Rule = lambda: Rule(
name="subject-min-length",
condition="subject has value or more characters",
rule="always",
value=0,
)(),
body_max_length: Rule = lambda: Rule(
name="body-max-length",
condition="body has value or less characters",
rule="always",
value=float("inf"),
)(),
body_min_length: Rule = lambda: Rule(
name="body-min-length",
condition="body has value or more characters",
rule="always",
value=0,
)(),
body_case: Rule = lambda: Rule(
name="body-case",
condition="body is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)(),
body_full_stop: Rule = lambda: Rule(
name="body-full-stop",
condition="body ends with value",
rule="never",
value=".",
level=DISABLED,
)(),
references_empty: Rule = lambda: Rule(
name="references-empty",
condition="references has at least one entry",
rule="never",
level=DISABLED,
)(),
signed_off_by: Rule = lambda: Rule(
name="signed-off-by",
condition="message has value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)(),
trailer_exists: Rule = lambda: Rule(
name="trailer-exists",
condition="message has trailer value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)(),
footer_max_length: Rule = lambda: Rule(
name="footer-max-length",
condition="footer has value or less characters",
rule="always",
value=float("inf"),
)(),
footer_min_length: Rule = lambda: Rule(
name="footer-min-length",
condition="footer has value or more characters",
rule="always",
value=0,
)(),
) -> None
header_max_length
class-attribute
instance-attribute
header_max_length: Rule = field(
default_factory=lambda: Rule(
name="header-max-length",
condition="header has value or less characters",
rule="always",
value=100,
level=ERROR,
)
)
header_min_length
class-attribute
instance-attribute
header_min_length: Rule = field(
default_factory=lambda: Rule(
name="header-min-length",
condition="header has value or more characters",
rule="always",
value=0,
)
)
header_case
class-attribute
instance-attribute
header_case: Rule = field(
default_factory=lambda: Rule(
name="header-case",
condition="header is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)
)
header_full_stop
class-attribute
instance-attribute
header_full_stop: Rule = field(
default_factory=lambda: Rule(
name="header-full-stop",
condition="header ends with value",
rule="never",
value=".",
)
)
header_trim
class-attribute
instance-attribute
header_trim: Rule = field(
default_factory=lambda: Rule(
name="header-trim",
condition="header must not have initial and/or trailing whitespaces",
rule="always",
)
)
type_enum
class-attribute
instance-attribute
type_enum: Rule = field(
default_factory=lambda: Rule(
name="type-enum",
condition="type is found in value",
rule="always",
value=[],
)
)
type_case
class-attribute
instance-attribute
type_case: Rule = field(
default_factory=lambda: Rule(
name="type-case",
condition="type is in case value",
rule="always",
value="lower-case",
)
)
type_empty
class-attribute
instance-attribute
type_empty: Rule = field(
default_factory=lambda: Rule(
name="type-empty",
condition="type is empty",
rule="never",
)
)
scope_enum
class-attribute
instance-attribute
scope_enum: Rule = field(
default_factory=lambda: Rule(
name="scope-enum",
condition="scope is found in value",
rule="always",
value=[],
level=DISABLED,
)
)
scope_case
class-attribute
instance-attribute
scope_case: Rule = field(
default_factory=lambda: Rule(
name="scope-case",
condition="scope is in case value",
rule="always",
value="lower-case",
)
)
scope_empty
class-attribute
instance-attribute
scope_empty: Rule = field(
default_factory=lambda: Rule(
name="scope-empty",
condition="scope is empty",
rule="never",
level=DISABLED,
)
)
subject_case
class-attribute
instance-attribute
subject_case: Rule = field(
default_factory=lambda: Rule(
name="subject-case",
condition="subject is in case value",
rule="always",
value=[
"sentence-case",
"start-case",
"pascal-case",
"upper-case",
],
)
)
subject_empty
class-attribute
instance-attribute
subject_empty: Rule = field(
default_factory=lambda: Rule(
name="subject-empty",
condition="subject is empty",
rule="never",
)
)
subject_full_stop
class-attribute
instance-attribute
subject_full_stop: Rule = field(
default_factory=lambda: Rule(
name="subject-full-stop",
condition="subject ends with value",
rule="never",
value=".",
)
)
subject_exclamation_mark
class-attribute
instance-attribute
subject_exclamation_mark: Rule = field(
default_factory=lambda: Rule(
name="subject-exclamation-mark",
condition="subject has exclamation before the : marker",
rule="never",
level=DISABLED,
)
)
body_leading_blank
class-attribute
instance-attribute
body_leading_blank: Rule = field(
default_factory=lambda: Rule(
name="body-leading-blank",
condition="body begins with blank line",
rule="always",
level=WARNING,
)
)
body_empty
class-attribute
instance-attribute
body_empty: Rule = field(
default_factory=lambda: Rule(
name="body-empty",
condition="body is empty",
rule="never",
level=DISABLED,
)
)
body_max_line_length
class-attribute
instance-attribute
body_max_line_length: Rule = field(
default_factory=lambda: Rule(
name="body-max-line-length",
condition="body lines has value or less characters",
rule="always",
value=100,
)
)
footer_leading_blank
class-attribute
instance-attribute
footer_leading_blank: Rule = field(
default_factory=lambda: Rule(
name="footer-leading-blank",
condition="footer begins with blank line",
rule="always",
level=WARNING,
)
)
footer_empty
class-attribute
instance-attribute
footer_empty: Rule = field(
default_factory=lambda: Rule(
name="footer-empty",
condition="footer is empty",
rule="never",
level=DISABLED,
)
)
footer_max_line_length
class-attribute
instance-attribute
footer_max_line_length: Rule = field(
default_factory=lambda: Rule(
name="footer-max-line-length",
condition="footer lines has value or less characters",
rule="always",
value=100,
)
)
type_max_length
class-attribute
instance-attribute
type_max_length: Rule = field(
default_factory=lambda: Rule(
name="type-max-length",
condition="type has value or less characters",
rule="always",
value=float("inf"),
)
)
type_min_length
class-attribute
instance-attribute
type_min_length: Rule = field(
default_factory=lambda: Rule(
name="type-min-length",
condition="type has value or more characters",
rule="always",
value=0,
)
)
scope_max_length
class-attribute
instance-attribute
scope_max_length: Rule = field(
default_factory=lambda: Rule(
name="scope-max-length",
condition="scope has value or less characters",
rule="always",
value=float("inf"),
)
)
scope_min_length
class-attribute
instance-attribute
scope_min_length: Rule = field(
default_factory=lambda: Rule(
name="scope-min-length",
condition="scope has value or more characters",
rule="always",
value=0,
)
)
subject_max_length
class-attribute
instance-attribute
subject_max_length: Rule = field(
default_factory=lambda: Rule(
name="subject-max-length",
condition="subject has value or less characters",
rule="always",
value=float("inf"),
)
)
subject_min_length
class-attribute
instance-attribute
subject_min_length: Rule = field(
default_factory=lambda: Rule(
name="subject-min-length",
condition="subject has value or more characters",
rule="always",
value=0,
)
)
body_max_length
class-attribute
instance-attribute
body_max_length: Rule = field(
default_factory=lambda: Rule(
name="body-max-length",
condition="body has value or less characters",
rule="always",
value=float("inf"),
)
)
body_min_length
class-attribute
instance-attribute
body_min_length: Rule = field(
default_factory=lambda: Rule(
name="body-min-length",
condition="body has value or more characters",
rule="always",
value=0,
)
)
body_case
class-attribute
instance-attribute
body_case: Rule = field(
default_factory=lambda: Rule(
name="body-case",
condition="body is in case value",
rule="always",
value="lower-case",
level=DISABLED,
)
)
body_full_stop
class-attribute
instance-attribute
body_full_stop: Rule = field(
default_factory=lambda: Rule(
name="body-full-stop",
condition="body ends with value",
rule="never",
value=".",
level=DISABLED,
)
)
references_empty
class-attribute
instance-attribute
references_empty: Rule = field(
default_factory=lambda: Rule(
name="references-empty",
condition="references has at least one entry",
rule="never",
level=DISABLED,
)
)
signed_off_by
class-attribute
instance-attribute
signed_off_by: Rule = field(
default_factory=lambda: Rule(
name="signed-off-by",
condition="message has value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)
)
trailer_exists
class-attribute
instance-attribute
trailer_exists: Rule = field(
default_factory=lambda: Rule(
name="trailer-exists",
condition="message has trailer value",
rule="always",
value="Signed-off-by:",
level=DISABLED,
)
)
footer_max_length
class-attribute
instance-attribute
footer_max_length: Rule = field(
default_factory=lambda: Rule(
name="footer-max-length",
condition="footer has value or less characters",
rule="always",
value=float("inf"),
)
)
footer_min_length
class-attribute
instance-attribute
footer_min_length: Rule = field(
default_factory=lambda: Rule(
name="footer-min-length",
condition="footer has value or more characters",
rule="always",
value=0,
)
)
from_dict
classmethod
from_dict(
config_dict: dict[str, Any],
config_loader: ConfigLoader | None = None,
) -> CommitLintConfig
Create a CommitLintConfig from a dictionary.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
config_dict
|
dict[str, Any]
|
Configuration dictionary to parse |
required |
config_loader
|
ConfigLoader | None
|
Optional ConfigLoader instance for retrieving additional configuration |
None
|
Returns:
| Name | Type | Description |
|---|---|---|
CommitLintConfig |
CommitLintConfig
|
Configured instance |
Source code in src/codemap/git/commit_linter/config.py
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 | |
get_all_rules
get_all_rules() -> list[Rule]
Get all rules as a list.
Source code in src/codemap/git/commit_linter/config.py
445 446 447 448 449 450 451 | |
constants
Constants for commit linting.
HEADER_MAX_LENGTH
module-attribute
HEADER_MAX_LENGTH = DEFAULT_CONFIG["commit"]["convention"][
"max_length"
]
BODY_MAX_LENGTH
module-attribute
BODY_MAX_LENGTH = DEFAULT_CONFIG["commit"]["lint"][
"body_max_line_length"
]["value"]
FOOTER_DETECTION_MIN_LINES
module-attribute
FOOTER_DETECTION_MIN_LINES = 2
FOOTER_MIN_LINE_INDEX
module-attribute
FOOTER_MIN_LINE_INDEX = 2
MIN_BODY_LINE_INDEX
module-attribute
MIN_BODY_LINE_INDEX = 2
ASCII_MAX_VALUE
module-attribute
ASCII_MAX_VALUE = 127
COMMIT_REGEX
module-attribute
COMMIT_REGEX = compile(
"^(?P<type>[a-zA-Z]+)(?:\\((?P<scope>[a-zA-Z0-9\\-_]*(?:/[a-zA-Z0-9\\-_]*)?)\\))?(?P<breaking>!)?: (?P<description>.+?)(?:\\r?\\n\\r?\\n(?P<body_and_footers>.*))?$",
DOTALL | MULTILINE | IGNORECASE,
)
FOOTER_REGEX
module-attribute
FOOTER_REGEX = compile(
"^(?P<token>(?:BREAKING[ -]CHANGE)|(?:[A-Z][A-Z0-9\\-]+))(?P<separator>: | #)(?P<value_part>.*)",
MULTILINE | DOTALL,
)
POTENTIAL_FOOTER_TOKEN_REGEX
module-attribute
POTENTIAL_FOOTER_TOKEN_REGEX = compile(
"^([A-Za-z][A-Za-z0-9\\-]+|[Bb][Rr][Ee][Aa][Kk][Ii][Nn][Gg][ -][Cc][Hh][Aa][Nn][Gg][Ee])(: | #)",
MULTILINE,
)
BREAKING_CHANGE
module-attribute
BREAKING_CHANGE = 'BREAKING CHANGE'
BREAKING_CHANGE_HYPHEN
module-attribute
BREAKING_CHANGE_HYPHEN = 'BREAKING-CHANGE'
VALID_FOOTER_TOKEN_REGEX
module-attribute
VALID_FOOTER_TOKEN_REGEX = compile(
"^(?:[A-Z][A-Z0-9\\-]+|BREAKING[ -]CHANGE)$"
)
VALID_TYPE_REGEX
module-attribute
VALID_TYPE_REGEX = compile('^[a-zA-Z]+$')
VALID_SCOPE_REGEX
module-attribute
VALID_SCOPE_REGEX = compile(
"^[a-zA-Z0-9\\-_]*(?:/[a-zA-Z0-9\\-_]*)*$"
)
BREAKING_CHANGE_REGEX
module-attribute
BREAKING_CHANGE_REGEX = compile(
"^breaking[ -]change$", IGNORECASE
)
CASE_FORMATS
module-attribute
CASE_FORMATS = {
"lower-case": lambda s: lower() == s,
"upper-case": lambda s: upper() == s,
"camel-case": lambda s: s
and islower()
and " " not in s
and "-" not in s
and "_" not in s,
"kebab-case": lambda s: lower() == s
and "-" in s
and " " not in s
and "_" not in s,
"pascal-case": lambda s: s
and isupper()
and " " not in s
and "-" not in s
and "_" not in s,
"sentence-case": lambda s: s
and isupper()
and lower() == s[1:],
"snake-case": lambda s: lower() == s
and "_" in s
and " " not in s
and "-" not in s,
"start-case": lambda s: all(
isupper() for w in split() if w
),
}
validators
Validators for commit message components.
CommitValidators
Collection of validator methods for different parts of commit messages.
Source code in src/codemap/git/commit_linter/validators.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | |
validate_footer_token
staticmethod
validate_footer_token(token: str) -> bool
Validate a footer token according to the Conventional Commits spec.
According to the spec: 1. Tokens MUST use hyphens instead of spaces 2. BREAKING CHANGE must be uppercase 3. Footer tokens should be ALL UPPERCASE 4. Footer tokens should follow format with - for spaces 5. No special characters or Unicode (non-ASCII) characters allowed
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if token is valid, False otherwise |
Source code in src/codemap/git/commit_linter/validators.py
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | |
validate_type_and_scope
staticmethod
validate_type_and_scope(
type_value: str, scope_value: str | None
) -> list[str]
Validate type and scope values according to the spec.
Type must contain only letters. Scope must contain only letters, numbers, hyphens, and slashes. Both must be ASCII-only.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
type_value
|
str
|
The commit message type |
required |
scope_value
|
str | None
|
The optional scope |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
list[str]: List of error messages, empty if valid |
Source code in src/codemap/git/commit_linter/validators.py
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | |
validate_case
staticmethod
validate_case(
text: str, case_format: str | list[str]
) -> bool
Validate if the text follows the specified case format.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str
|
The text to validate |
required |
case_format
|
str or list
|
The case format(s) to check |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text matches any of the specified case formats |
Source code in src/codemap/git/commit_linter/validators.py
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | |
validate_length
staticmethod
validate_length(
text: str | None, min_length: int, max_length: float
) -> bool
Validate if text length is between min and max length.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate, or None |
required |
min_length
|
int
|
Minimum allowed length |
required |
max_length
|
int | float
|
Maximum allowed length |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text length is valid, False otherwise |
Source code in src/codemap/git/commit_linter/validators.py
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | |
validate_enum
staticmethod
validate_enum(text: str, allowed_values: list[str]) -> bool
Validate if text is in the allowed values.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str
|
The text to validate |
required |
allowed_values
|
list
|
The allowed values |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text is in allowed values, False otherwise |
Source code in src/codemap/git/commit_linter/validators.py
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | |
validate_empty
staticmethod
validate_empty(
text: str | None, should_be_empty: bool
) -> bool
Validate if text is empty or not based on configuration.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
should_be_empty
|
bool
|
True if text should be empty, False if not |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text empty status matches should_be_empty |
Source code in src/codemap/git/commit_linter/validators.py
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | |
validate_ends_with
staticmethod
validate_ends_with(
text: str | None, suffix: str, should_end_with: bool
) -> bool
Validate if text ends with a specific suffix.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
suffix
|
str
|
The suffix to check for |
required |
should_end_with
|
bool
|
True if text should end with suffix |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text ending matches expectation |
Source code in src/codemap/git/commit_linter/validators.py
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | |
validate_starts_with
staticmethod
validate_starts_with(
text: str | None, prefix: str, should_start_with: bool
) -> bool
Validate if text starts with a specific prefix.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
prefix
|
str
|
The prefix to check for |
required |
should_start_with
|
bool
|
True if text should start with prefix |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text starting matches expectation |
Source code in src/codemap/git/commit_linter/validators.py
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | |
validate_line_length
staticmethod
validate_line_length(
text: str | None, max_line_length: float
) -> list[int]
Validate line lengths in multiline text.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
max_line_length
|
int | float
|
Maximum allowed line length |
required |
Returns:
| Name | Type | Description |
|---|---|---|
list |
list[int]
|
List of line numbers with errors (0-indexed) |
Source code in src/codemap/git/commit_linter/validators.py
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 | |
validate_leading_blank
staticmethod
validate_leading_blank(
text: str | None, required_blank: bool
) -> bool
Validate if text starts with a blank line.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
required_blank
|
bool
|
True if text should start with blank line |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text leading blank matches expectation |
Source code in src/codemap/git/commit_linter/validators.py
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 | |
validate_trim
staticmethod
validate_trim(text: str | None) -> bool
Validate if text has no leading/trailing whitespace.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text has no leading/trailing whitespace |
Source code in src/codemap/git/commit_linter/validators.py
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | |
validate_contains
staticmethod
validate_contains(
text: str | None, substring: str, should_contain: bool
) -> bool
Validate if text contains a specific substring.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str | None
|
The text to validate |
required |
substring
|
str
|
The substring to check for |
required |
should_contain
|
bool
|
True if text should contain substring |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
True if text contains substring matches expectation |
Source code in src/codemap/git/commit_linter/validators.py
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | |
semantic_grouping
Semantic grouping implementation for the CodeMap project.
This module provides functionality to group related diff chunks into semantic groups for more meaningful commit messages.
batch_generate_messages
batch_generate_messages(
groups: list[SemanticGroup],
prompt_template: str,
config_loader: ConfigLoader,
model: str | None = None,
) -> list[SemanticGroup]
Generate commit messages for multiple semantic groups in batch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
groups
|
list[SemanticGroup]
|
List of SemanticGroup objects |
required |
prompt_template
|
str
|
Template to use for prompt generation |
required |
config_loader
|
ConfigLoader
|
ConfigLoader instance |
required |
model
|
str | None
|
Optional model name override |
None
|
Returns:
| Type | Description |
|---|---|
list[SemanticGroup]
|
List of SemanticGroup objects with messages added |
Raises:
| Type | Description |
|---|---|
LLMError
|
If batch processing fails |
Source code in src/codemap/git/semantic_grouping/batch_processor.py
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | |
DiffClusterer
Clusters diff chunks based on their semantic embeddings.
This class provides methods to group related code changes by their semantic similarity, using vector embeddings and standard clustering algorithms from scikit-learn.
Clustering helps identify code changes that are related to each other and should be grouped in the same commit, even if they appear in different files.
The class supports multiple clustering algorithms: 1. Agglomerative clustering: Hierarchical clustering that's good for finding natural groupings without needing to specify the exact number of clusters 2. DBSCAN: Density-based clustering that can identify outliers and works well with irregularly shaped clusters
Source code in src/codemap/git/semantic_grouping/clusterer.py
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | |
__init__
__init__(
method: str = "agglomerative", **kwargs: object
) -> None
Initialize the clusterer.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
method
|
str
|
Clustering method to use. Options: - "agglomerative": Hierarchical clustering (default) - "dbscan": Density-based spatial clustering |
'agglomerative'
|
**kwargs
|
object
|
Additional parameters for the clustering algorithm: - For agglomerative: distance_threshold, linkage, etc. - For DBSCAN: eps, min_samples, etc. |
{}
|
Raises:
| Type | Description |
|---|---|
ImportError
|
If scikit-learn is not installed |
Source code in src/codemap/git/semantic_grouping/clusterer.py
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | |
method
instance-attribute
method = method
kwargs
instance-attribute
kwargs = kwargs
AgglomerativeClustering
instance-attribute
AgglomerativeClustering = AgglomerativeClustering
DBSCAN
instance-attribute
DBSCAN = DBSCAN
cosine_similarity
instance-attribute
cosine_similarity = cosine_similarity
cluster
Cluster chunks based on their embeddings.
Process:
1. Extracts chunks and embeddings from input tuples
2. Computes a similarity matrix using cosine similarity
3. Converts similarity to distance matrix (1 - similarity)
4. Applies clustering algorithm based on the chosen method
5. Organizes chunks into clusters based on labels
6. Handles special cases like noise points in DBSCAN
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk_embeddings
|
list[tuple[DiffChunk, ndarray]]
|
List of (chunk, embedding) tuples where each embedding is a numpy array representing the semantic vector of a code chunk |
required |
Returns:
| Type | Description |
|---|---|
list[list[DiffChunk]]
|
List of lists, where each inner list contains chunks in the same cluster. |
list[list[DiffChunk]]
|
With DBSCAN, noise points (label -1) are returned as individual single-item clusters. |
Examples:
>>> embedder = DiffEmbedder()
>>> chunk_embeddings = embedder.embed_chunks(diff_chunks)
>>> clusterer = DiffClusterer(method="agglomerative", distance_threshold=0.5)
>>> clusters = clusterer.cluster(chunk_embeddings)
>>> for i, cluster in enumerate(clusters):
... print(f"Cluster {i} has {len(cluster)} chunks")
Source code in src/codemap/git/semantic_grouping/clusterer.py
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | |
format_chunk
format_chunk(chunk: DiffChunk) -> str
Format a single diff chunk as markdown.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
The diff chunk to format |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown string |
Source code in src/codemap/git/semantic_grouping/context_processor.py
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | |
prioritize_chunks
Prioritize chunks based on heuristics (file types, changes, etc.).
This is a simple implementation that could be extended with more sophisticated dissimilarity metrics.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of chunks to prioritize |
required |
max_count
|
int
|
Maximum number of chunks to return |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
Prioritized list of chunks |
Source code in src/codemap/git/semantic_grouping/context_processor.py
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | |
process_chunks_with_lod
process_chunks_with_lod(
chunks: list[DiffChunk],
max_tokens: int = DEFAULT_MAX_TOKENS,
) -> str
Process diff chunks using LOD to create optimized context for LLM prompts.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of diff chunks to process |
required |
max_tokens
|
int
|
Maximum tokens allowed in the formatted context |
DEFAULT_MAX_TOKENS
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown context optimized for token usage |
Source code in src/codemap/git/semantic_grouping/context_processor.py
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | |
DiffEmbedder
Generates embeddings for diff chunks.
Source code in src/codemap/git/semantic_grouping/embedder.py
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | |
__init__
__init__(model_name: str = 'all-MiniLM-L6-v2') -> None
Initialize the embedder with a specific model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model_name
|
str
|
Name of the sentence-transformers model to use |
'all-MiniLM-L6-v2'
|
Source code in src/codemap/git/semantic_grouping/embedder.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | |
model
instance-attribute
model = SentenceTransformer(model_name)
preprocess_diff
preprocess_diff(diff_text: str) -> str
Preprocess diff text to make it more suitable for embedding.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff_text
|
str
|
Raw diff text |
required |
Returns:
| Type | Description |
|---|---|
str
|
Preprocessed text |
Source code in src/codemap/git/semantic_grouping/embedder.py
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | |
embed_chunk
embed_chunk(chunk: DiffChunk) -> ndarray
Generate an embedding for a diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
DiffChunk object |
required |
Returns:
| Type | Description |
|---|---|
ndarray
|
numpy.ndarray: Embedding vector |
Source code in src/codemap/git/semantic_grouping/embedder.py
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | |
embed_chunks
Generate embeddings for multiple chunks.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of DiffChunk objects |
required |
Returns:
| Type | Description |
|---|---|
list[tuple[DiffChunk, ndarray]]
|
List of (chunk, embedding) tuples |
Source code in src/codemap/git/semantic_grouping/embedder.py
85 86 87 88 89 90 91 92 93 94 95 96 | |
FileIntegrityResolver
Resolves file integrity constraints for semantic groups.
File integrity refers to the requirement that all changes to a specific file should be included in the same commit, even if they are semantically different. This prevents fragmented changes to the same file across multiple commits, which can lead to broken builds or inconsistent states.
The resolver works by: 1. Identifying files that appear in multiple semantic groups 2. Calculating the semantic similarity between these overlapping groups 3. Either merging similar groups or reassigning chunks from less relevant groups to the most appropriate group
This process ensures that each file is modified in exactly one commit, while still maintaining semantic coherence within commits when possible.
Source code in src/codemap/git/semantic_grouping/resolver.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | |
__init__
__init__(similarity_threshold: float = 0.6) -> None
Initialize the resolver.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
similarity_threshold
|
float
|
Threshold for group similarity to trigger merging (0.0-1.0). Higher values require greater similarity to merge groups: - Values near 0.5 are permissive and will merge moderately related groups - Values above 0.7 are strict and will mostly reassign chunks instead of merging - Default 0.6 provides a balanced approach |
0.6
|
Raises:
| Type | Description |
|---|---|
ImportError
|
If scikit-learn is not installed |
Source code in src/codemap/git/semantic_grouping/resolver.py
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | |
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
cosine_similarity
instance-attribute
cosine_similarity = cosine_similarity
calculate_group_similarity
calculate_group_similarity(
group1: SemanticGroup,
group2: SemanticGroup,
chunk_embeddings: dict[DiffChunk, ndarray],
) -> float
Calculate similarity between two groups based on their chunks' embeddings.
This method computes the average pairwise cosine similarity between all combinations of chunks from the two groups. The similarity is based on the semantic embeddings of the chunks' content.
Process: 1. Extract embeddings for all chunks in both groups 2. Compute pairwise cosine similarities between each pair of chunks 3. Return the average similarity score
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
group1
|
SemanticGroup
|
First semantic group to compare |
required |
group2
|
SemanticGroup
|
Second semantic group to compare |
required |
chunk_embeddings
|
dict[DiffChunk, ndarray]
|
Dict mapping chunks to their embeddings |
required |
Returns:
| Name | Type | Description |
|---|---|---|
float |
float
|
Similarity score between 0 and 1, where: - 0 indicates completely unrelated changes - 1 indicates identical or extremely similar changes - Values around 0.6-0.8 typically indicate related functionality |
Source code in src/codemap/git/semantic_grouping/resolver.py
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | |
resolve_violations
resolve_violations(
groups: list[SemanticGroup],
chunk_embeddings: dict[DiffChunk, ndarray],
) -> list[SemanticGroup]
Resolve file integrity violations by merging or reassigning chunks.
A violation occurs when the same file appears in multiple semantic groups. This needs to be resolved because a file should be modified in only one commit.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
groups
|
list[SemanticGroup]
|
List of SemanticGroup objects to resolve |
required |
chunk_embeddings
|
dict[DiffChunk, ndarray]
|
Dict mapping chunks to their embeddings |
required |
Returns:
| Type | Description |
|---|---|
list[SemanticGroup]
|
List of SemanticGroup objects with all violations resolved |
Source code in src/codemap/git/semantic_grouping/resolver.py
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | |
SemanticGroup
dataclass
A semantic group of related diff chunks.
This class represents a group of related diff chunks that should be committed together because they are semantically related.
Source code in src/codemap/git/semantic_grouping/__init__.py
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | |
__init__
__init__(
chunks: list[DiffChunk] = list(),
files: list[str] = list(),
content: str = "",
message: str | None = None,
approved: bool = False,
embedding: list[float] | None = None,
) -> None
files
class-attribute
instance-attribute
files: list[str] = field(default_factory=list)
content
class-attribute
instance-attribute
content: str = ''
message
class-attribute
instance-attribute
message: str | None = None
approved
class-attribute
instance-attribute
approved: bool = False
embedding
class-attribute
instance-attribute
embedding: list[float] | None = None
__post_init__
__post_init__() -> None
Initialize files and content from chunks if not provided.
Source code in src/codemap/git/semantic_grouping/__init__.py
51 52 53 54 55 56 57 58 59 60 61 62 | |
merge_with
merge_with(other: SemanticGroup) -> SemanticGroup
Merge this group with another group.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
SemanticGroup
|
Another SemanticGroup to merge with |
required |
Returns:
| Type | Description |
|---|---|
SemanticGroup
|
A new SemanticGroup containing chunks from both groups |
Source code in src/codemap/git/semantic_grouping/__init__.py
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | |
batch_processor
Batch processing for semantic groups commit message generation.
This module provides functionality to generate commit messages for multiple semantic groups in batch using LiteLLM's batch_completion.
logger
module-attribute
logger = getLogger(__name__)
batch_generate_messages
batch_generate_messages(
groups: list[SemanticGroup],
prompt_template: str,
config_loader: ConfigLoader,
model: str | None = None,
) -> list[SemanticGroup]
Generate commit messages for multiple semantic groups in batch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
groups
|
list[SemanticGroup]
|
List of SemanticGroup objects |
required |
prompt_template
|
str
|
Template to use for prompt generation |
required |
config_loader
|
ConfigLoader
|
ConfigLoader instance |
required |
model
|
str | None
|
Optional model name override |
None
|
Returns:
| Type | Description |
|---|---|
list[SemanticGroup]
|
List of SemanticGroup objects with messages added |
Raises:
| Type | Description |
|---|---|
LLMError
|
If batch processing fails |
Source code in src/codemap/git/semantic_grouping/batch_processor.py
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | |
resolver
Module for resolving file integrity constraints in semantic groups.
This module provides functionality for ensuring that changes to the same file are kept in the same commit, even when semantic clustering might separate them. This ensures that file integrity is maintained during the commit process.
Key components: - FileIntegrityResolver: Main class that analyzes file overlaps between semantic groups and decides whether to merge groups or reassign chunks to maintain file integrity
The resolution process involves: 1. Detecting violations (files that appear in multiple semantic groups) 2. Calculating semantic similarity between groups with overlapping files 3. Deciding whether to merge groups (if sufficiently similar) or reassign chunks 4. Iteratively resolving violations until all files are in exactly one group
logger
module-attribute
logger = getLogger(__name__)
FileIntegrityResolver
Resolves file integrity constraints for semantic groups.
File integrity refers to the requirement that all changes to a specific file should be included in the same commit, even if they are semantically different. This prevents fragmented changes to the same file across multiple commits, which can lead to broken builds or inconsistent states.
The resolver works by: 1. Identifying files that appear in multiple semantic groups 2. Calculating the semantic similarity between these overlapping groups 3. Either merging similar groups or reassigning chunks from less relevant groups to the most appropriate group
This process ensures that each file is modified in exactly one commit, while still maintaining semantic coherence within commits when possible.
Source code in src/codemap/git/semantic_grouping/resolver.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | |
__init__
__init__(similarity_threshold: float = 0.6) -> None
Initialize the resolver.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
similarity_threshold
|
float
|
Threshold for group similarity to trigger merging (0.0-1.0). Higher values require greater similarity to merge groups: - Values near 0.5 are permissive and will merge moderately related groups - Values above 0.7 are strict and will mostly reassign chunks instead of merging - Default 0.6 provides a balanced approach |
0.6
|
Raises:
| Type | Description |
|---|---|
ImportError
|
If scikit-learn is not installed |
Source code in src/codemap/git/semantic_grouping/resolver.py
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | |
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
cosine_similarity
instance-attribute
cosine_similarity = cosine_similarity
calculate_group_similarity
calculate_group_similarity(
group1: SemanticGroup,
group2: SemanticGroup,
chunk_embeddings: dict[DiffChunk, ndarray],
) -> float
Calculate similarity between two groups based on their chunks' embeddings.
This method computes the average pairwise cosine similarity between all combinations of chunks from the two groups. The similarity is based on the semantic embeddings of the chunks' content.
Process: 1. Extract embeddings for all chunks in both groups 2. Compute pairwise cosine similarities between each pair of chunks 3. Return the average similarity score
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
group1
|
SemanticGroup
|
First semantic group to compare |
required |
group2
|
SemanticGroup
|
Second semantic group to compare |
required |
chunk_embeddings
|
dict[DiffChunk, ndarray]
|
Dict mapping chunks to their embeddings |
required |
Returns:
| Name | Type | Description |
|---|---|---|
float |
float
|
Similarity score between 0 and 1, where: - 0 indicates completely unrelated changes - 1 indicates identical or extremely similar changes - Values around 0.6-0.8 typically indicate related functionality |
Source code in src/codemap/git/semantic_grouping/resolver.py
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | |
resolve_violations
resolve_violations(
groups: list[SemanticGroup],
chunk_embeddings: dict[DiffChunk, ndarray],
) -> list[SemanticGroup]
Resolve file integrity violations by merging or reassigning chunks.
A violation occurs when the same file appears in multiple semantic groups. This needs to be resolved because a file should be modified in only one commit.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
groups
|
list[SemanticGroup]
|
List of SemanticGroup objects to resolve |
required |
chunk_embeddings
|
dict[DiffChunk, ndarray]
|
Dict mapping chunks to their embeddings |
required |
Returns:
| Type | Description |
|---|---|
list[SemanticGroup]
|
List of SemanticGroup objects with all violations resolved |
Source code in src/codemap/git/semantic_grouping/resolver.py
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | |
context_processor
Context processing utilities for LLM prompts.
This module provides functionality to process and format code contexts for LLM prompts using tree-sitter analysis and Level of Detail (LOD) to optimize context length while preserving meaningful content.
logger
module-attribute
logger = getLogger(__name__)
DEFAULT_MAX_TOKENS
module-attribute
DEFAULT_MAX_TOKENS = 4000
CHUNK_TOKEN_ESTIMATE
module-attribute
CHUNK_TOKEN_ESTIMATE = 500
MAX_CHUNKS
module-attribute
MAX_CHUNKS = 6
MAX_SIMPLE_CHUNKS
module-attribute
MAX_SIMPLE_CHUNKS = 3
process_chunks_with_lod
process_chunks_with_lod(
chunks: list[DiffChunk],
max_tokens: int = DEFAULT_MAX_TOKENS,
) -> str
Process diff chunks using LOD to create optimized context for LLM prompts.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of diff chunks to process |
required |
max_tokens
|
int
|
Maximum tokens allowed in the formatted context |
DEFAULT_MAX_TOKENS
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown context optimized for token usage |
Source code in src/codemap/git/semantic_grouping/context_processor.py
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | |
prioritize_chunks
Prioritize chunks based on heuristics (file types, changes, etc.).
This is a simple implementation that could be extended with more sophisticated dissimilarity metrics.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of chunks to prioritize |
required |
max_count
|
int
|
Maximum number of chunks to return |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
Prioritized list of chunks |
Source code in src/codemap/git/semantic_grouping/context_processor.py
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | |
get_file_paths_from_chunk
get_file_paths_from_chunk(chunk: DiffChunk) -> list[str]
Extract file paths from a diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
The diff chunk to process |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
List of file paths |
Source code in src/codemap/git/semantic_grouping/context_processor.py
153 154 155 156 157 158 159 160 161 162 163 164 | |
format_lod_entity
Format an LOD entity as GitHub-flavored markdown.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
entity
|
LODEntity
|
The LOD entity to format |
required |
file_path
|
str
|
Path to the source file |
required |
level
|
LODLevel
|
LOD level used |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown string |
Source code in src/codemap/git/semantic_grouping/context_processor.py
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | |
format_entity_structure
format_entity_structure(
entity: LODEntity, indent: int
) -> str
Format entity with structure (signatures and hierarchy).
Source code in src/codemap/git/semantic_grouping/context_processor.py
192 193 194 195 196 197 198 199 200 201 202 203 204 205 | |
format_entity_signatures
format_entity_signatures(
entity: LODEntity, indent: int
) -> str
Format entity with just signatures.
Source code in src/codemap/git/semantic_grouping/context_processor.py
208 209 210 211 212 213 214 215 216 217 218 219 220 221 | |
format_regular_chunks
format_regular_chunks(chunks: list[DiffChunk]) -> str
Format chunks using the regular approach when LOD is not necessary.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of chunks to format |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown string |
Source code in src/codemap/git/semantic_grouping/context_processor.py
224 225 226 227 228 229 230 231 232 233 234 235 236 | |
format_chunk
format_chunk(chunk: DiffChunk) -> str
Format a single diff chunk as markdown.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
The diff chunk to format |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted markdown string |
Source code in src/codemap/git/semantic_grouping/context_processor.py
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | |
estimate_tokens
estimate_tokens(text: str) -> int
Estimate the number of tokens in a text.
This is a simple estimation that can be improved with actual tokenizer implementations if needed.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
text
|
str
|
Text to estimate tokens for |
required |
Returns:
| Type | Description |
|---|---|
int
|
Estimated token count |
Source code in src/codemap/git/semantic_grouping/context_processor.py
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | |
truncate_context
truncate_context(context: str, max_tokens: int) -> str
Truncate context to fit within token limit.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
context
|
str
|
Context to truncate |
required |
max_tokens
|
int
|
Maximum allowed tokens |
required |
Returns:
| Type | Description |
|---|---|
str
|
Truncated context |
Source code in src/codemap/git/semantic_grouping/context_processor.py
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 | |
group
Module for semantic grouping of diff chunks.
SemanticGroup
Represents a group of semantically related diff chunks.
Source code in src/codemap/git/semantic_grouping/group.py
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | |
__init__
__init__(
chunks: list[DiffChunk] | None = None,
name: str | None = None,
) -> None
Initialize a semantic group.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk] | None
|
List of DiffChunk objects |
None
|
name
|
str | None
|
Optional name for the group |
None
|
Source code in src/codemap/git/semantic_grouping/group.py
9 10 11 12 13 14 15 16 17 18 19 20 21 | |
chunks
instance-attribute
chunks = chunks or []
name
instance-attribute
name = name
message
instance-attribute
message: str | None = None
approved
instance-attribute
approved = False
files
property
files: list[str]
Get the set of files affected by this group.
content
property
content: str
Get the combined diff content of all chunks.
merge_with
merge_with(other_group: SemanticGroup) -> SemanticGroup
Merge this group with another group.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other_group
|
SemanticGroup
|
Another SemanticGroup to merge with |
required |
Returns:
| Type | Description |
|---|---|
SemanticGroup
|
A new SemanticGroup containing chunks from both groups |
Source code in src/codemap/git/semantic_grouping/group.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 | |
__repr__
__repr__() -> str
Return a string representation of the group with file and chunk counts.
Source code in src/codemap/git/semantic_grouping/group.py
51 52 53 | |
embedder
Module for generating embeddings from diff chunks.
logger
module-attribute
logger = getLogger(__name__)
DiffEmbedder
Generates embeddings for diff chunks.
Source code in src/codemap/git/semantic_grouping/embedder.py
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | |
__init__
__init__(model_name: str = 'all-MiniLM-L6-v2') -> None
Initialize the embedder with a specific model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model_name
|
str
|
Name of the sentence-transformers model to use |
'all-MiniLM-L6-v2'
|
Source code in src/codemap/git/semantic_grouping/embedder.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | |
model
instance-attribute
model = SentenceTransformer(model_name)
preprocess_diff
preprocess_diff(diff_text: str) -> str
Preprocess diff text to make it more suitable for embedding.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff_text
|
str
|
Raw diff text |
required |
Returns:
| Type | Description |
|---|---|
str
|
Preprocessed text |
Source code in src/codemap/git/semantic_grouping/embedder.py
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | |
embed_chunk
embed_chunk(chunk: DiffChunk) -> ndarray
Generate an embedding for a diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
DiffChunk object |
required |
Returns:
| Type | Description |
|---|---|
ndarray
|
numpy.ndarray: Embedding vector |
Source code in src/codemap/git/semantic_grouping/embedder.py
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | |
embed_chunks
Generate embeddings for multiple chunks.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of DiffChunk objects |
required |
Returns:
| Type | Description |
|---|---|
list[tuple[DiffChunk, ndarray]]
|
List of (chunk, embedding) tuples |
Source code in src/codemap/git/semantic_grouping/embedder.py
85 86 87 88 89 90 91 92 93 94 95 96 | |
clusterer
Module for clustering diff chunks based on their embeddings.
This module provides functionality to group related code changes together based on their semantic similarity, using vector embeddings and clustering algorithms. The clustering process helps identify related changes that should be committed together.
Key components: - DiffClusterer: Main class that implements clustering algorithms for diff chunks - ClusteringParams: Type definition for parameters used by clustering algorithms
The module supports multiple clustering methods: 1. Agglomerative (hierarchical) clustering: Builds a hierarchy of clusters based on distances between embeddings, using a distance threshold to determine final cluster boundaries 2. DBSCAN: Density-based clustering that groups points in high-density regions, treating low-density points as noise/outliers
logger
module-attribute
logger = getLogger(__name__)
ClusteringParams
Bases: TypedDict
Type definition for clustering algorithm parameters.
These parameters configure the behavior of the clustering algorithms:
For agglomerative clustering: - n_clusters: Optional limit on number of clusters (None means no limit) - distance_threshold: Maximum distance for clusters to be merged (lower = more clusters) - metric: Distance metric to use (e.g., "precomputed" for precomputed distance matrix) - linkage: Strategy for calculating distances between clusters ("average", "single", etc.)
For DBSCAN: - eps: Maximum distance between points in the same neighborhood - min_samples: Minimum points required to form a dense region - metric: Distance metric to use
Source code in src/codemap/git/semantic_grouping/clusterer.py
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | |
n_clusters
instance-attribute
n_clusters: int | None
distance_threshold
instance-attribute
distance_threshold: float | None
metric
instance-attribute
metric: str
linkage
instance-attribute
linkage: str
eps
instance-attribute
eps: float
min_samples
instance-attribute
min_samples: int
T
module-attribute
T = TypeVar('T')
DiffClusterer
Clusters diff chunks based on their semantic embeddings.
This class provides methods to group related code changes by their semantic similarity, using vector embeddings and standard clustering algorithms from scikit-learn.
Clustering helps identify code changes that are related to each other and should be grouped in the same commit, even if they appear in different files.
The class supports multiple clustering algorithms: 1. Agglomerative clustering: Hierarchical clustering that's good for finding natural groupings without needing to specify the exact number of clusters 2. DBSCAN: Density-based clustering that can identify outliers and works well with irregularly shaped clusters
Source code in src/codemap/git/semantic_grouping/clusterer.py
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | |
__init__
__init__(
method: str = "agglomerative", **kwargs: object
) -> None
Initialize the clusterer.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
method
|
str
|
Clustering method to use. Options: - "agglomerative": Hierarchical clustering (default) - "dbscan": Density-based spatial clustering |
'agglomerative'
|
**kwargs
|
object
|
Additional parameters for the clustering algorithm: - For agglomerative: distance_threshold, linkage, etc. - For DBSCAN: eps, min_samples, etc. |
{}
|
Raises:
| Type | Description |
|---|---|
ImportError
|
If scikit-learn is not installed |
Source code in src/codemap/git/semantic_grouping/clusterer.py
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | |
method
instance-attribute
method = method
kwargs
instance-attribute
kwargs = kwargs
AgglomerativeClustering
instance-attribute
AgglomerativeClustering = AgglomerativeClustering
DBSCAN
instance-attribute
DBSCAN = DBSCAN
cosine_similarity
instance-attribute
cosine_similarity = cosine_similarity
cluster
Cluster chunks based on their embeddings.
Process:
1. Extracts chunks and embeddings from input tuples
2. Computes a similarity matrix using cosine similarity
3. Converts similarity to distance matrix (1 - similarity)
4. Applies clustering algorithm based on the chosen method
5. Organizes chunks into clusters based on labels
6. Handles special cases like noise points in DBSCAN
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk_embeddings
|
list[tuple[DiffChunk, ndarray]]
|
List of (chunk, embedding) tuples where each embedding is a numpy array representing the semantic vector of a code chunk |
required |
Returns:
| Type | Description |
|---|---|
list[list[DiffChunk]]
|
List of lists, where each inner list contains chunks in the same cluster. |
list[list[DiffChunk]]
|
With DBSCAN, noise points (label -1) are returned as individual single-item clusters. |
Examples:
>>> embedder = DiffEmbedder()
>>> chunk_embeddings = embedder.embed_chunks(diff_chunks)
>>> clusterer = DiffClusterer(method="agglomerative", distance_threshold=0.5)
>>> clusters = clusterer.cluster(chunk_embeddings)
>>> for i, cluster in enumerate(clusters):
... print(f"Cluster {i} has {len(cluster)} chunks")
Source code in src/codemap/git/semantic_grouping/clusterer.py
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | |
pr_generator
PR generation package for CodeMap.
This package provides modules for generating and managing pull requests.
git_operation
Decorator for git operations.
This decorator wraps functions that perform git operations, providing: - Logging of operation start/end - Standardized error handling - Automatic conversion of git-related exceptions to GitError
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func
|
F
|
The function to decorate |
required |
Returns:
| Type | Description |
|---|---|
F
|
Decorated function |
Source code in src/codemap/git/pr_generator/decorators.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | |
PRGenerator
Generator for Pull Requests.
This class handles generating pull request content (title and description) and creating/updating PRs on GitHub.
Source code in src/codemap/git/pr_generator/generator.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | |
__init__
__init__(repo_path: Path, llm_client: LLMClient) -> None
Initialize the PR generator.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_path
|
Path
|
Path to the git repository |
required |
llm_client
|
LLMClient
|
LLMClient instance to use for content generation |
required |
Source code in src/codemap/git/pr_generator/generator.py
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | |
repo_path
instance-attribute
repo_path = repo_path
client
instance-attribute
client = llm_client
generate_content_from_commits
generate_content_from_commits(
base_branch: str, head_branch: str, use_llm: bool = True
) -> PRContent
Generate PR content (title and description) from commits.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
use_llm
|
bool
|
Whether to use LLM for generation |
True
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' keys |
Source code in src/codemap/git/pr_generator/generator.py
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | |
generate_content_from_template
generate_content_from_template(
branch_name: str,
description: str,
workflow_strategy: str = "github-flow",
) -> PRContent
Generate PR content (title and description) from a template.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
description
|
str
|
Short description of the changes |
required |
workflow_strategy
|
str
|
Git workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' keys |
Source code in src/codemap/git/pr_generator/generator.py
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | |
suggest_branch_name
suggest_branch_name(
description: str, workflow_strategy: str = "github-flow"
) -> str
Suggest a branch name based on a description.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
description
|
str
|
Description of the branch |
required |
workflow_strategy
|
str
|
Git workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/generator.py
108 109 110 111 112 113 114 115 116 117 118 119 120 | |
create_pr
create_pr(
base_branch: str,
head_branch: str,
title: str,
description: str,
) -> PullRequest
Create a pull request on GitHub.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
title
|
str
|
PR title |
required |
description
|
str
|
PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object with PR details |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR creation fails |
Source code in src/codemap/git/pr_generator/generator.py
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | |
update_pr
update_pr(
pr_number: int, title: str, description: str
) -> PullRequest
Update an existing pull request.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
pr_number
|
int
|
PR number |
required |
title
|
str
|
New PR title |
required |
description
|
str
|
New PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
Updated PullRequest object |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR update fails |
Source code in src/codemap/git/pr_generator/generator.py
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | |
get_existing_pr
get_existing_pr(branch_name: str) -> PullRequest | None
Get an existing PR for a branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Branch name |
required |
Returns:
| Type | Description |
|---|---|
PullRequest | None
|
PullRequest object if found, None otherwise |
Source code in src/codemap/git/pr_generator/generator.py
159 160 161 162 163 164 165 166 167 168 169 170 | |
create_or_update_pr
create_or_update_pr(
base_branch: str | None = None,
head_branch: str | None = None,
title: str | None = None,
description: str | None = None,
use_llm: bool = True,
pr_number: int | None = None,
) -> PullRequest
Create a new PR or update an existing one.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str | None
|
Base branch (defaults to default branch) |
None
|
head_branch
|
str | None
|
Head branch |
None
|
title
|
str | None
|
PR title (if None, will be generated) |
None
|
description
|
str | None
|
PR description (if None, will be generated) |
None
|
use_llm
|
bool
|
Whether to use LLM for content generation |
True
|
pr_number
|
int | None
|
PR number for update (if None, will create new PR) |
None
|
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR creation/update fails |
Source code in src/codemap/git/pr_generator/generator.py
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | |
PR_DESCRIPTION_PROMPT
module-attribute
PR_DESCRIPTION_PROMPT = "\nBased on the following commits, generate a comprehensive PR description following this template:\n\n## What type of PR is this? (check all applicable)\n\n- [ ] Refactor\n- [ ] Feature\n- [ ] Bug Fix\n- [ ] Optimization\n- [ ] Documentation Update\n\n## Description\n[Fill this section with a detailed description of the changes]\n\n## Related Tickets & Documents\n- Related Issue #\n- Closes #\n\n## Added/updated tests?\n- [ ] Yes\n- [ ] No, and this is why: [explanation]\n- [ ] I need help with writing tests\n\nConsider the following guidelines:\n- Check the appropriate PR type boxes based on the commit messages\n- Provide a clear, detailed description of the changes\n- Include any relevant issue numbers that this PR relates to or closes\n- Indicate if tests were added, and if not, explain why\n- Use bullet points for clarity\n\nCommits:\n{commit_list}\n\nPR Description:\n---\n\nIMPORTANT:\n- Do not include any other text in your response except the PR description.\n- Do not wrap the PR description in quotes.\n- Do not add any explanations or other text to your response.\n"
PR_TITLE_PROMPT
module-attribute
PR_TITLE_PROMPT = 'Based on the following commits, generate a clear, concise PR title that captures the\nessence of the changes.\nFollow these guidelines:\n- Focus on the most important change\n- If there are multiple related changes, summarize them\n- Keep it under 80 characters\n- Start with a capital letter\n- Don\'t use a period at the end\n- Use present tense (e.g., "Add feature" not "Added feature")\n- Be descriptive and specific (e.g., "Fix memory leak in data processing" not just "Fix bug")\n- Include the type of change if clear (Feature, Fix, Refactor, etc.)\n\nCommits:\n{commit_list}\n\nPR Title:\n---\n\nIMPORTANT:\n- Do not include any other text in your response except the PR title.\n- Do not wrap the PR title in quotes.\n- Do not add any explanations or other text to your response.\n'
format_commits_for_prompt
format_commits_for_prompt(commits: list[str]) -> str
Format commit messages as a bulleted list.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted commit list as a string |
Source code in src/codemap/git/pr_generator/prompts.py
73 74 75 76 77 78 79 80 81 82 83 84 | |
BranchType
module-attribute
BranchType = Literal[
"feature", "release", "hotfix", "bugfix", "docs"
]
PRContent
Bases: TypedDict
Pull request content type.
Source code in src/codemap/git/pr_generator/schemas.py
13 14 15 16 17 | |
title
instance-attribute
title: str
description
instance-attribute
description: str
PullRequest
dataclass
Represents a GitHub Pull Request.
Source code in src/codemap/git/pr_generator/schemas.py
20 21 22 23 24 25 26 27 28 | |
branch
instance-attribute
branch: str
title
instance-attribute
title: str
description
instance-attribute
description: str
url
class-attribute
instance-attribute
url: str | None = None
number
class-attribute
instance-attribute
number: int | None = None
__init__
__init__(
branch: str,
title: str,
description: str,
url: str | None = None,
number: int | None = None,
) -> None
WorkflowStrategySchema
module-attribute
WorkflowStrategySchema = Literal[
"github-flow", "gitflow", "trunk-based"
]
GitFlowStrategy
Bases: WorkflowStrategy
Implementation of GitFlow workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, bugfix) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch |
Source code in src/codemap/git/pr_generator/strategies.py
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for GitFlow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of valid branch types for GitFlow |
Source code in src/codemap/git/pr_generator/strategies.py
340 341 342 343 344 345 346 347 348 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on GitFlow conventions.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, bugfix) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
373 374 375 376 377 378 379 380 381 382 383 384 | |
GitHubFlowStrategy
Bases: WorkflowStrategy
Implementation of GitHub Flow workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch (usually 'main') |
Source code in src/codemap/git/pr_generator/strategies.py
246 247 248 249 250 251 252 253 254 255 256 257 258 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix (empty string for GitHub Flow) |
Source code in src/codemap/git/pr_generator/strategies.py
260 261 262 263 264 265 266 267 268 269 270 271 272 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for GitHub Flow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List containing only 'feature' |
Source code in src/codemap/git/pr_generator/strategies.py
274 275 276 277 278 279 280 281 282 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
284 285 286 287 288 289 290 291 292 293 294 295 | |
TrunkBasedStrategy
Bases: WorkflowStrategy
Implementation of Trunk-Based Development workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch (trunk, which is usually 'main') |
Source code in src/codemap/git/pr_generator/strategies.py
390 391 392 393 394 395 396 397 398 399 400 401 402 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
404 405 406 407 408 409 410 411 412 413 414 415 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for Trunk-Based Development.
Returns:
| Type | Description |
|---|---|
list[str]
|
List containing only 'feature' |
Source code in src/codemap/git/pr_generator/strategies.py
417 418 419 420 421 422 423 424 425 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on Trunk-Based Development conventions.
Emphasizes short-lived, descriptive branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
463 464 465 466 467 468 469 470 471 472 473 474 | |
WorkflowStrategy
Bases: ABC
Base class for git workflow strategies.
Source code in src/codemap/git/pr_generator/strategies.py
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | |
get_default_base
abstractmethod
get_default_base(branch_type: str) -> str | None
Get the default base branch for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch |
Source code in src/codemap/git/pr_generator/strategies.py
22 23 24 25 26 27 28 29 30 31 32 33 34 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on the workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | |
get_branch_prefix
abstractmethod
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
54 55 56 57 58 59 60 61 62 63 64 65 66 | |
get_branch_types
abstractmethod
get_branch_types() -> list[str]
Get valid branch types for this workflow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of valid branch types |
Source code in src/codemap/git/pr_generator/strategies.py
68 69 70 71 72 73 74 75 76 77 | |
detect_branch_type
detect_branch_type(branch_name: str | None) -> str | None
Detect the type of a branch from its name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str | None
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Branch type or None if not detected |
Source code in src/codemap/git/pr_generator/strategies.py
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
96 97 98 99 100 101 102 103 104 105 106 107 108 | |
get_remote_branches
get_remote_branches() -> list[str]
Get list of remote branches.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of remote branch names (without 'origin/' prefix) |
Source code in src/codemap/git/pr_generator/strategies.py
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | |
get_local_branches
get_local_branches() -> list[str]
Get list of local branches.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of local branch names |
Source code in src/codemap/git/pr_generator/strategies.py
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | |
get_branches_by_type
get_branches_by_type() -> dict[str, list[str]]
Group branches by their type.
Returns:
| Type | Description |
|---|---|
dict[str, list[str]]
|
Dictionary mapping branch types to lists of branch names |
Source code in src/codemap/git/pr_generator/strategies.py
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | |
get_branch_metadata
get_branch_metadata(branch_name: str) -> dict[str, Any]
Get metadata for a specific branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
dict[str, Any]
|
Dictionary with branch metadata |
Source code in src/codemap/git/pr_generator/strategies.py
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | |
get_all_branches_with_metadata
get_all_branches_with_metadata() -> dict[
str, dict[str, Any]
]
Get all branches with metadata.
Returns:
| Type | Description |
|---|---|
dict[str, dict[str, Any]]
|
Dictionary mapping branch names to metadata dictionaries |
Source code in src/codemap/git/pr_generator/strategies.py
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | |
create_strategy
create_strategy(strategy_name: str) -> WorkflowStrategy
Create a workflow strategy instance based on the strategy name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
strategy_name
|
str
|
The name of the workflow strategy to create. |
required |
Returns:
| Type | Description |
|---|---|
WorkflowStrategy
|
An instance of the requested workflow strategy. |
Raises:
| Type | Description |
|---|---|
ValueError
|
If the strategy name is unknown. |
Source code in src/codemap/git/pr_generator/strategies.py
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 | |
PRCreationError
Bases: GitError
Error raised when there's an issue creating or updating a pull request.
Source code in src/codemap/git/pr_generator/utils.py
24 25 | |
checkout_branch
checkout_branch(branch_name: str) -> None
Checkout an existing branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to checkout |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | |
create_branch
create_branch(branch_name: str) -> None
Create a new branch and switch to it.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to create |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | |
create_pull_request
create_pull_request(
base_branch: str,
head_branch: str,
title: str,
description: str,
) -> PullRequest
Create a pull request on GitHub.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
title
|
str
|
PR title |
required |
description
|
str
|
PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object with PR details |
Raises:
| Type | Description |
|---|---|
PRCreationError
|
If PR creation fails |
Source code in src/codemap/git/pr_generator/utils.py
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | |
detect_branch_type
detect_branch_type(
branch_name: str, strategy_name: str = "github-flow"
) -> str
Detect the type of a branch based on its name and workflow strategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
strategy_name
|
str
|
Name of the workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
str
|
Branch type or "feature" if not detected |
Source code in src/codemap/git/pr_generator/utils.py
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 | |
generate_pr_content_from_template
generate_pr_content_from_template(
branch_name: str,
description: str,
strategy_name: str = "github-flow",
) -> PRContent
Generate PR title and description using templates from the selected workflow strategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
description
|
str
|
Short description of the changes |
required |
strategy_name
|
str
|
Name of the workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' fields |
Source code in src/codemap/git/pr_generator/utils.py
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 | |
generate_pr_description_from_commits
generate_pr_description_from_commits(
commits: list[str],
) -> str
Generate a PR description from commit messages.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated PR description |
Source code in src/codemap/git/pr_generator/utils.py
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | |
generate_pr_description_with_llm
generate_pr_description_with_llm(
commits: list[str],
llm_client: LLMClient | None = None,
model: str | None = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
) -> str
Generate a PR description using an LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
llm_client
|
LLMClient | None
|
LLMClient instance to use (if provided) |
None
|
model
|
str | None
|
LLM model to use (used only if llm_client is None) |
'gpt-4o-mini'
|
api_key
|
str | None
|
API key for LLM provider (used only if llm_client is None) |
None
|
api_base
|
str | None
|
Custom API base URL (used only if llm_client is None) |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Generated PR description |
Source code in src/codemap/git/pr_generator/utils.py
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | |
generate_pr_title_from_commits
generate_pr_title_from_commits(commits: list[str]) -> str
Generate a PR title from commit messages.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated PR title |
Source code in src/codemap/git/pr_generator/utils.py
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | |
generate_pr_title_with_llm
generate_pr_title_with_llm(
commits: list[str],
llm_client: LLMClient | None = None,
model: str | None = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
) -> str
Generate a PR title using an LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
llm_client
|
LLMClient | None
|
LLMClient instance to use (if provided) |
None
|
model
|
str | None
|
LLM model to use (used only if llm_client is None) |
'gpt-4o-mini'
|
api_key
|
str | None
|
API key for LLM provider (used only if llm_client is None) |
None
|
api_base
|
str | None
|
Custom API base URL (used only if llm_client is None) |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Generated PR title |
Source code in src/codemap/git/pr_generator/utils.py
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | |
get_branch_description
get_branch_description(branch_name: str) -> str
Generate a description for a branch based on its commits.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Description of the branch |
Source code in src/codemap/git/pr_generator/utils.py
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 | |
get_branch_relation
get_branch_relation(
branch: str, target_branch: str
) -> tuple[bool, int]
Get the relationship between two branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch
|
str
|
The branch to check |
required |
target_branch
|
str
|
The target branch to compare against |
required |
Returns:
| Type | Description |
|---|---|
bool
|
Tuple of (is_ancestor, commit_count) |
int
|
|
tuple[bool, int]
|
|
Source code in src/codemap/git/pr_generator/utils.py
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 | |
get_commit_messages
get_commit_messages(
base_branch: str, head_branch: str
) -> list[str]
Get commit messages between two branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
List of commit messages |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | |
get_current_branch
get_current_branch() -> str
Get the name of the current branch.
Returns:
| Type | Description |
|---|---|
str
|
Name of the current branch |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | |
get_default_branch
get_default_branch() -> str
Get the default branch of the repository.
Returns:
| Type | Description |
|---|---|
str
|
Name of the default branch (usually main or master) |
Source code in src/codemap/git/pr_generator/strategies.py
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 | |
get_existing_pr
get_existing_pr(branch_name: str) -> PullRequest | None
Get an existing PR for a branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Branch name |
required |
Returns:
| Type | Description |
|---|---|
PullRequest | None
|
PullRequest object if found, None otherwise |
Source code in src/codemap/git/pr_generator/utils.py
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 | |
push_branch
push_branch(branch_name: str, force: bool = False) -> None
Push a branch to the remote.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to push |
required |
force
|
bool
|
Whether to force push |
False
|
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | |
suggest_branch_name
suggest_branch_name(message: str, workflow: str) -> str
Suggest a branch name based on a commit message and workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Commit message or description |
required |
workflow
|
str
|
Git workflow strategy to use |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/utils.py
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | |
update_pull_request
update_pull_request(
pr_number: int | None, title: str, description: str
) -> PullRequest
Update an existing pull request.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
pr_number
|
int | None
|
PR number |
required |
title
|
str
|
New PR title |
required |
description
|
str
|
New PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
Updated PullRequest object |
Raises:
| Type | Description |
|---|---|
PRCreationError
|
If PR update fails |
Source code in src/codemap/git/pr_generator/utils.py
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 | |
templates
PR template definitions for different workflow strategies.
DEFAULT_PR_TEMPLATE
module-attribute
DEFAULT_PR_TEMPLATE = {
"title": "{branch_type}: {description}",
"description": "## Description\n\n{description}\n\n## Changes\n\n-\n\n## Related Issues\n\n-\n",
}
GITHUB_FLOW_PR_TEMPLATE
module-attribute
GITHUB_FLOW_PR_TEMPLATE = {
"title": "{description}",
"description": "## Description\n\n{description}\n\n## What does this PR do?\n\n<!-- Please include a summary of the change and which issue is fixed. -->\n\n## Changes\n\n-\n\n## Screenshots (if appropriate)\n\n## Testing completed\n\n- [ ] Unit tests\n- [ ] Integration tests\n- [ ] Manual testing\n\n## Related Issues\n\n<!-- Please link to any related issues here -->\n\n- Closes #\n",
}
TRUNK_BASED_PR_TEMPLATE
module-attribute
TRUNK_BASED_PR_TEMPLATE = {
"title": "{description}",
"description": "## Change Description\n\n{description}\n\n## Implementation\n\n<!-- Briefly describe implementation details -->\n\n-\n\n## Test Plan\n\n<!-- How was this tested? -->\n\n- [ ] Unit tests added/updated\n- [ ] Integration tested\n\n## Rollout Plan\n\n<!-- How should this be deployed? -->\n\n- [ ] Can be deployed immediately\n- [ ] Requires feature flag\n- [ ] Requires data migration\n\n## Related Issues\n\n- Fixes #\n",
}
GITFLOW_PR_TEMPLATES
module-attribute
GITFLOW_PR_TEMPLATES = {
"feature": {
"title": "Feature: {description}",
"description": "## Feature Description\n\n{description}\n\n## Implemented Changes\n\n-\n\n## Testing Performed\n\n- [ ] Unit tests\n- [ ] Integration tests\n- [ ] Manual testing\n\n## Related Issues\n\n- Closes #\n",
},
"release": {
"title": "Release {description}",
"description": "## Release {description}\n\n### Features\n\n-\n\n### Bug Fixes\n\n-\n\n### Breaking Changes\n\n-\n\n## Deployment Notes\n\n-\n\n## Testing Required\n\n- [ ] Smoke tests\n- [ ] Regression tests\n- [ ] Performance tests\n",
},
"hotfix": {
"title": "Hotfix: {description}",
"description": "## Hotfix: {description}\n\n### Issue Description\n\n<!-- Describe the issue being fixed -->\n\n### Fix Implementation\n\n<!-- Describe how the issue was fixed -->\n\n-\n\n### Testing Performed\n\n- [ ] Verified fix locally\n- [ ] Added regression test\n\n### Impact Analysis\n\n- Affected components:\n- Risk assessment:\n",
},
"bugfix": {
"title": "Fix: {description}",
"description": "## Bug Fix\n\n### Issue Description\n\n{description}\n\n### Root Cause\n\n<!-- What caused the bug? -->\n\n### Fix Implementation\n\n-\n\n### Testing Performed\n\n- [ ] Added test case that reproduces the bug\n- [ ] Verified fix locally\n\n### Related Issues\n\n- Fixes #\n",
},
}
schemas
Schemas and data structures for PR generation.
WorkflowStrategySchema
module-attribute
WorkflowStrategySchema = Literal[
"github-flow", "gitflow", "trunk-based"
]
BranchType
module-attribute
BranchType = Literal[
"feature", "release", "hotfix", "bugfix", "docs"
]
PRContent
Bases: TypedDict
Pull request content type.
Source code in src/codemap/git/pr_generator/schemas.py
13 14 15 16 17 | |
title
instance-attribute
title: str
description
instance-attribute
description: str
PullRequest
dataclass
Represents a GitHub Pull Request.
Source code in src/codemap/git/pr_generator/schemas.py
20 21 22 23 24 25 26 27 28 | |
__init__
__init__(
branch: str,
title: str,
description: str,
url: str | None = None,
number: int | None = None,
) -> None
branch
instance-attribute
branch: str
title
instance-attribute
title: str
description
instance-attribute
description: str
url
class-attribute
instance-attribute
url: str | None = None
number
class-attribute
instance-attribute
number: int | None = None
prompts
Prompt templates for PR generation.
PR_TITLE_PROMPT
module-attribute
PR_TITLE_PROMPT = 'Based on the following commits, generate a clear, concise PR title that captures the\nessence of the changes.\nFollow these guidelines:\n- Focus on the most important change\n- If there are multiple related changes, summarize them\n- Keep it under 80 characters\n- Start with a capital letter\n- Don\'t use a period at the end\n- Use present tense (e.g., "Add feature" not "Added feature")\n- Be descriptive and specific (e.g., "Fix memory leak in data processing" not just "Fix bug")\n- Include the type of change if clear (Feature, Fix, Refactor, etc.)\n\nCommits:\n{commit_list}\n\nPR Title:\n---\n\nIMPORTANT:\n- Do not include any other text in your response except the PR title.\n- Do not wrap the PR title in quotes.\n- Do not add any explanations or other text to your response.\n'
PR_DESCRIPTION_PROMPT
module-attribute
PR_DESCRIPTION_PROMPT = "\nBased on the following commits, generate a comprehensive PR description following this template:\n\n## What type of PR is this? (check all applicable)\n\n- [ ] Refactor\n- [ ] Feature\n- [ ] Bug Fix\n- [ ] Optimization\n- [ ] Documentation Update\n\n## Description\n[Fill this section with a detailed description of the changes]\n\n## Related Tickets & Documents\n- Related Issue #\n- Closes #\n\n## Added/updated tests?\n- [ ] Yes\n- [ ] No, and this is why: [explanation]\n- [ ] I need help with writing tests\n\nConsider the following guidelines:\n- Check the appropriate PR type boxes based on the commit messages\n- Provide a clear, detailed description of the changes\n- Include any relevant issue numbers that this PR relates to or closes\n- Indicate if tests were added, and if not, explain why\n- Use bullet points for clarity\n\nCommits:\n{commit_list}\n\nPR Description:\n---\n\nIMPORTANT:\n- Do not include any other text in your response except the PR description.\n- Do not wrap the PR description in quotes.\n- Do not add any explanations or other text to your response.\n"
format_commits_for_prompt
format_commits_for_prompt(commits: list[str]) -> str
Format commit messages as a bulleted list.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted commit list as a string |
Source code in src/codemap/git/pr_generator/prompts.py
73 74 75 76 77 78 79 80 81 82 83 84 | |
utils
Utility functions for PR generation.
logger
module-attribute
logger = getLogger(__name__)
PRCreationError
Bases: GitError
Error raised when there's an issue creating or updating a pull request.
Source code in src/codemap/git/pr_generator/utils.py
24 25 | |
get_current_branch
get_current_branch() -> str
Get the name of the current branch.
Returns:
| Type | Description |
|---|---|
str
|
Name of the current branch |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | |
create_branch
create_branch(branch_name: str) -> None
Create a new branch and switch to it.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to create |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | |
checkout_branch
checkout_branch(branch_name: str) -> None
Checkout an existing branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to checkout |
required |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | |
push_branch
push_branch(branch_name: str, force: bool = False) -> None
Push a branch to the remote.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to push |
required |
force
|
bool
|
Whether to force push |
False
|
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | |
get_commit_messages
get_commit_messages(
base_branch: str, head_branch: str
) -> list[str]
Get commit messages between two branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
List of commit messages |
Raises:
| Type | Description |
|---|---|
GitError
|
If git command fails |
Source code in src/codemap/git/pr_generator/utils.py
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | |
generate_pr_title_from_commits
generate_pr_title_from_commits(commits: list[str]) -> str
Generate a PR title from commit messages.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated PR title |
Source code in src/codemap/git/pr_generator/utils.py
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | |
generate_pr_title_with_llm
generate_pr_title_with_llm(
commits: list[str],
llm_client: LLMClient | None = None,
model: str | None = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
) -> str
Generate a PR title using an LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
llm_client
|
LLMClient | None
|
LLMClient instance to use (if provided) |
None
|
model
|
str | None
|
LLM model to use (used only if llm_client is None) |
'gpt-4o-mini'
|
api_key
|
str | None
|
API key for LLM provider (used only if llm_client is None) |
None
|
api_base
|
str | None
|
Custom API base URL (used only if llm_client is None) |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Generated PR title |
Source code in src/codemap/git/pr_generator/utils.py
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | |
generate_pr_description_from_commits
generate_pr_description_from_commits(
commits: list[str],
) -> str
Generate a PR description from commit messages.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated PR description |
Source code in src/codemap/git/pr_generator/utils.py
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 | |
generate_pr_description_with_llm
generate_pr_description_with_llm(
commits: list[str],
llm_client: LLMClient | None = None,
model: str | None = "gpt-4o-mini",
api_key: str | None = None,
api_base: str | None = None,
) -> str
Generate a PR description using an LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commits
|
list[str]
|
List of commit messages |
required |
llm_client
|
LLMClient | None
|
LLMClient instance to use (if provided) |
None
|
model
|
str | None
|
LLM model to use (used only if llm_client is None) |
'gpt-4o-mini'
|
api_key
|
str | None
|
API key for LLM provider (used only if llm_client is None) |
None
|
api_base
|
str | None
|
Custom API base URL (used only if llm_client is None) |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Generated PR description |
Source code in src/codemap/git/pr_generator/utils.py
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 | |
create_pull_request
create_pull_request(
base_branch: str,
head_branch: str,
title: str,
description: str,
) -> PullRequest
Create a pull request on GitHub.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
title
|
str
|
PR title |
required |
description
|
str
|
PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object with PR details |
Raises:
| Type | Description |
|---|---|
PRCreationError
|
If PR creation fails |
Source code in src/codemap/git/pr_generator/utils.py
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | |
update_pull_request
update_pull_request(
pr_number: int | None, title: str, description: str
) -> PullRequest
Update an existing pull request.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
pr_number
|
int | None
|
PR number |
required |
title
|
str
|
New PR title |
required |
description
|
str
|
New PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
Updated PullRequest object |
Raises:
| Type | Description |
|---|---|
PRCreationError
|
If PR update fails |
Source code in src/codemap/git/pr_generator/utils.py
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 | |
get_existing_pr
get_existing_pr(branch_name: str) -> PullRequest | None
Get an existing PR for a branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Branch name |
required |
Returns:
| Type | Description |
|---|---|
PullRequest | None
|
PullRequest object if found, None otherwise |
Source code in src/codemap/git/pr_generator/utils.py
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 | |
generate_pr_content_from_template
generate_pr_content_from_template(
branch_name: str,
description: str,
strategy_name: str = "github-flow",
) -> PRContent
Generate PR title and description using templates from the selected workflow strategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
description
|
str
|
Short description of the changes |
required |
strategy_name
|
str
|
Name of the workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' fields |
Source code in src/codemap/git/pr_generator/utils.py
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 | |
get_timestamp
get_timestamp() -> str
Get a timestamp string for branch names.
Returns:
| Type | Description |
|---|---|
str
|
Timestamp string in YYYYMMDD-HHMMSS format |
Source code in src/codemap/git/pr_generator/utils.py
612 613 614 615 616 617 618 619 620 621 | |
suggest_branch_name
suggest_branch_name(message: str, workflow: str) -> str
Suggest a branch name based on a commit message and workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
Commit message or description |
required |
workflow
|
str
|
Git workflow strategy to use |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/utils.py
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | |
get_branch_relation
get_branch_relation(
branch: str, target_branch: str
) -> tuple[bool, int]
Get the relationship between two branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch
|
str
|
The branch to check |
required |
target_branch
|
str
|
The target branch to compare against |
required |
Returns:
| Type | Description |
|---|---|
bool
|
Tuple of (is_ancestor, commit_count) |
int
|
|
tuple[bool, int]
|
|
Source code in src/codemap/git/pr_generator/utils.py
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 | |
get_branch_description
get_branch_description(branch_name: str) -> str
Generate a description for a branch based on its commits.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Description of the branch |
Source code in src/codemap/git/pr_generator/utils.py
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 | |
detect_branch_type
detect_branch_type(
branch_name: str, strategy_name: str = "github-flow"
) -> str
Detect the type of a branch based on its name and workflow strategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
strategy_name
|
str
|
Name of the workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
str
|
Branch type or "feature" if not detected |
Source code in src/codemap/git/pr_generator/utils.py
785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 | |
list_branches
list_branches() -> list[str]
Get a list of all branches (local and remote).
Returns:
| Type | Description |
|---|---|
list[str]
|
List of branch names |
Source code in src/codemap/git/pr_generator/utils.py
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 | |
validate_branch_name
validate_branch_name(branch_name: str | None) -> bool
Validate a branch name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str | None
|
Branch name to validate |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if valid, False otherwise |
Source code in src/codemap/git/pr_generator/utils.py
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 | |
decorators
Decorators for the PR generator module.
logger
module-attribute
logger = getLogger(__name__)
F
module-attribute
F = TypeVar('F', bound=Callable[..., object])
git_operation
Decorator for git operations.
This decorator wraps functions that perform git operations, providing: - Logging of operation start/end - Standardized error handling - Automatic conversion of git-related exceptions to GitError
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
func
|
F
|
The function to decorate |
required |
Returns:
| Type | Description |
|---|---|
F
|
Decorated function |
Source code in src/codemap/git/pr_generator/decorators.py
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | |
constants
Constants for PR generation.
MAX_COMMIT_PREVIEW
module-attribute
MAX_COMMIT_PREVIEW = 3
MIN_SIGNIFICANT_WORD_LENGTH
module-attribute
MIN_SIGNIFICANT_WORD_LENGTH = 3
MIN_COMMIT_PARTS
module-attribute
MIN_COMMIT_PARTS = 3
strategies
Git workflow strategy implementations for PR management.
WorkflowStrategy
Bases: ABC
Base class for git workflow strategies.
Source code in src/codemap/git/pr_generator/strategies.py
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | |
get_default_base
abstractmethod
get_default_base(branch_type: str) -> str | None
Get the default base branch for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch |
Source code in src/codemap/git/pr_generator/strategies.py
22 23 24 25 26 27 28 29 30 31 32 33 34 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on the workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | |
get_branch_prefix
abstractmethod
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
54 55 56 57 58 59 60 61 62 63 64 65 66 | |
get_branch_types
abstractmethod
get_branch_types() -> list[str]
Get valid branch types for this workflow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of valid branch types |
Source code in src/codemap/git/pr_generator/strategies.py
68 69 70 71 72 73 74 75 76 77 | |
detect_branch_type
detect_branch_type(branch_name: str | None) -> str | None
Detect the type of a branch from its name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str | None
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Branch type or None if not detected |
Source code in src/codemap/git/pr_generator/strategies.py
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for a given branch type.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
96 97 98 99 100 101 102 103 104 105 106 107 108 | |
get_remote_branches
get_remote_branches() -> list[str]
Get list of remote branches.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of remote branch names (without 'origin/' prefix) |
Source code in src/codemap/git/pr_generator/strategies.py
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | |
get_local_branches
get_local_branches() -> list[str]
Get list of local branches.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of local branch names |
Source code in src/codemap/git/pr_generator/strategies.py
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | |
get_branches_by_type
get_branches_by_type() -> dict[str, list[str]]
Group branches by their type.
Returns:
| Type | Description |
|---|---|
dict[str, list[str]]
|
Dictionary mapping branch types to lists of branch names |
Source code in src/codemap/git/pr_generator/strategies.py
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | |
get_branch_metadata
get_branch_metadata(branch_name: str) -> dict[str, Any]
Get metadata for a specific branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
Returns:
| Type | Description |
|---|---|
dict[str, Any]
|
Dictionary with branch metadata |
Source code in src/codemap/git/pr_generator/strategies.py
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 | |
get_all_branches_with_metadata
get_all_branches_with_metadata() -> dict[
str, dict[str, Any]
]
Get all branches with metadata.
Returns:
| Type | Description |
|---|---|
dict[str, dict[str, Any]]
|
Dictionary mapping branch names to metadata dictionaries |
Source code in src/codemap/git/pr_generator/strategies.py
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 | |
GitHubFlowStrategy
Bases: WorkflowStrategy
Implementation of GitHub Flow workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch (usually 'main') |
Source code in src/codemap/git/pr_generator/strategies.py
246 247 248 249 250 251 252 253 254 255 256 257 258 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix (empty string for GitHub Flow) |
Source code in src/codemap/git/pr_generator/strategies.py
260 261 262 263 264 265 266 267 268 269 270 271 272 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for GitHub Flow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List containing only 'feature' |
Source code in src/codemap/git/pr_generator/strategies.py
274 275 276 277 278 279 280 281 282 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for GitHub Flow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (always 'feature' in GitHub Flow) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
284 285 286 287 288 289 290 291 292 293 294 295 | |
GitFlowStrategy
Bases: WorkflowStrategy
Implementation of GitFlow workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, bugfix) |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch |
Source code in src/codemap/git/pr_generator/strategies.py
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for GitFlow.
Returns:
| Type | Description |
|---|---|
list[str]
|
List of valid branch types for GitFlow |
Source code in src/codemap/git/pr_generator/strategies.py
340 341 342 343 344 345 346 347 348 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on GitFlow conventions.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, etc.) |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for GitFlow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch (feature, release, hotfix, bugfix) |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
373 374 375 376 377 378 379 380 381 382 383 384 | |
TrunkBasedStrategy
Bases: WorkflowStrategy
Implementation of Trunk-Based Development workflow strategy.
Source code in src/codemap/git/pr_generator/strategies.py
387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 | |
get_default_base
get_default_base(branch_type: str) -> str | None
Get the default base branch for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
str | None
|
Name of the default base branch (trunk, which is usually 'main') |
Source code in src/codemap/git/pr_generator/strategies.py
390 391 392 393 394 395 396 397 398 399 400 401 402 | |
get_branch_prefix
get_branch_prefix(branch_type: str) -> str
Get the branch name prefix for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Branch name prefix |
Source code in src/codemap/git/pr_generator/strategies.py
404 405 406 407 408 409 410 411 412 413 414 415 | |
get_branch_types
get_branch_types() -> list[str]
Get valid branch types for Trunk-Based Development.
Returns:
| Type | Description |
|---|---|
list[str]
|
List containing only 'feature' |
Source code in src/codemap/git/pr_generator/strategies.py
417 418 419 420 421 422 423 424 425 | |
suggest_branch_name
suggest_branch_name(
branch_type: str, description: str
) -> str
Suggest a branch name based on Trunk-Based Development conventions.
Emphasizes short-lived, descriptive branches.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
description
|
str
|
Description of the branch |
required |
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/strategies.py
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | |
get_pr_templates
get_pr_templates(branch_type: str) -> dict[str, str]
Get PR title and description templates for Trunk-Based Development.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_type
|
str
|
Type of branch |
required |
Returns:
| Type | Description |
|---|---|
dict[str, str]
|
Dictionary with 'title' and 'description' templates |
Source code in src/codemap/git/pr_generator/strategies.py
463 464 465 466 467 468 469 470 471 472 473 474 | |
get_strategy_class
get_strategy_class(
strategy_name: str,
) -> type[WorkflowStrategy] | None
Get the workflow strategy class corresponding to the strategy name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
strategy_name
|
str
|
Name of the workflow strategy |
required |
Returns:
| Type | Description |
|---|---|
type[WorkflowStrategy] | None
|
Workflow strategy class or None if not found |
Source code in src/codemap/git/pr_generator/strategies.py
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 | |
create_strategy
create_strategy(strategy_name: str) -> WorkflowStrategy
Create a workflow strategy instance based on the strategy name.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
strategy_name
|
str
|
The name of the workflow strategy to create. |
required |
Returns:
| Type | Description |
|---|---|
WorkflowStrategy
|
An instance of the requested workflow strategy. |
Raises:
| Type | Description |
|---|---|
ValueError
|
If the strategy name is unknown. |
Source code in src/codemap/git/pr_generator/strategies.py
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 | |
branch_exists
branch_exists(
branch_name: str, include_remote: bool = True
) -> bool
Check if a branch exists.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch to check |
required |
include_remote
|
bool
|
Whether to check remote branches as well |
True
|
Returns:
| Type | Description |
|---|---|
bool
|
True if the branch exists, False otherwise |
Source code in src/codemap/git/pr_generator/strategies.py
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 | |
get_default_branch
get_default_branch() -> str
Get the default branch of the repository.
Returns:
| Type | Description |
|---|---|
str
|
Name of the default branch (usually main or master) |
Source code in src/codemap/git/pr_generator/strategies.py
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 | |
command
Main PR generation command implementation for CodeMap.
logger
module-attribute
logger = getLogger(__name__)
PRCommand
Handles the PR generation command workflow.
Source code in src/codemap/git/pr_generator/command.py
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | |
__init__
__init__(
path: Path | None = None, model: str = "gpt-4o-mini"
) -> None
Initialize the PR command.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path
|
Path | None
|
Optional path to start from |
None
|
model
|
str
|
LLM model to use for PR description generation |
'gpt-4o-mini'
|
Source code in src/codemap/git/pr_generator/command.py
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | |
pr_generator
instance-attribute
pr_generator = PRGenerator(
repo_path=repo_root, llm_client=llm_client
)
error_state
instance-attribute
error_state = None
run
run() -> dict[str, Any]
Run the PR generation command.
Returns:
| Type | Description |
|---|---|
dict[str, Any]
|
Dictionary with PR information and generated description |
Raises:
| Type | Description |
|---|---|
RuntimeError
|
If the command fails |
Source code in src/codemap/git/pr_generator/command.py
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | |
PRWorkflowCommand
Handles the core PR creation and update workflow logic.
Source code in src/codemap/git/pr_generator/command.py
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | |
__init__
__init__(
repo_path: Path,
config_loader: ConfigLoader,
llm_client: LLMClient | None = None,
model: str | None = None,
api_key: str | None = None,
api_base: str | None = None,
) -> None
Initialize the PR workflow command helper.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_path
|
Path
|
Path to the repository. |
required |
config_loader
|
ConfigLoader
|
ConfigLoader instance. |
required |
llm_client
|
LLMClient | None
|
Optional pre-configured LLMClient. |
None
|
model
|
str | None
|
LLM model name (used if llm_client is None). |
None
|
api_key
|
str | None
|
API key (used if llm_client is None). |
None
|
api_base
|
str | None
|
API base URL (used if llm_client is None). |
None
|
Source code in src/codemap/git/pr_generator/command.py
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | |
repo_path
instance-attribute
repo_path = repo_path
config_loader
instance-attribute
config_loader = config_loader
pr_config
instance-attribute
pr_config = get('pr', {})
content_config
instance-attribute
content_config = get('content', {})
workflow_strategy_name
instance-attribute
workflow_strategy_name = get_workflow_strategy()
llm_client
instance-attribute
llm_client = llm_client
pr_generator
instance-attribute
pr_generator = PRGenerator(
repo_path=repo_path, llm_client=llm_client
)
create_pr_workflow
create_pr_workflow(
base_branch: str,
head_branch: str,
title: str | None = None,
description: str | None = None,
) -> PullRequest
Orchestrates the PR creation process (non-interactive part).
Source code in src/codemap/git/pr_generator/command.py
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 | |
update_pr_workflow
update_pr_workflow(
pr_number: int,
title: str | None = None,
description: str | None = None,
base_branch: str | None = None,
head_branch: str | None = None,
) -> PullRequest
Orchestrates the PR update process (non-interactive part).
Source code in src/codemap/git/pr_generator/command.py
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | |
generator
PR generator for the CodeMap Git module.
This class generates pull requests for git repositories.
logger
module-attribute
logger = getLogger(__name__)
PRGenerator
Generator for Pull Requests.
This class handles generating pull request content (title and description) and creating/updating PRs on GitHub.
Source code in src/codemap/git/pr_generator/generator.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | |
__init__
__init__(repo_path: Path, llm_client: LLMClient) -> None
Initialize the PR generator.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_path
|
Path
|
Path to the git repository |
required |
llm_client
|
LLMClient
|
LLMClient instance to use for content generation |
required |
Source code in src/codemap/git/pr_generator/generator.py
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | |
repo_path
instance-attribute
repo_path = repo_path
client
instance-attribute
client = llm_client
generate_content_from_commits
generate_content_from_commits(
base_branch: str, head_branch: str, use_llm: bool = True
) -> PRContent
Generate PR content (title and description) from commits.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
use_llm
|
bool
|
Whether to use LLM for generation |
True
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' keys |
Source code in src/codemap/git/pr_generator/generator.py
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | |
generate_content_from_template
generate_content_from_template(
branch_name: str,
description: str,
workflow_strategy: str = "github-flow",
) -> PRContent
Generate PR content (title and description) from a template.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Name of the branch |
required |
description
|
str
|
Short description of the changes |
required |
workflow_strategy
|
str
|
Git workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
PRContent
|
Dictionary with 'title' and 'description' keys |
Source code in src/codemap/git/pr_generator/generator.py
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | |
suggest_branch_name
suggest_branch_name(
description: str, workflow_strategy: str = "github-flow"
) -> str
Suggest a branch name based on a description.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
description
|
str
|
Description of the branch |
required |
workflow_strategy
|
str
|
Git workflow strategy to use |
'github-flow'
|
Returns:
| Type | Description |
|---|---|
str
|
Suggested branch name |
Source code in src/codemap/git/pr_generator/generator.py
108 109 110 111 112 113 114 115 116 117 118 119 120 | |
create_pr
create_pr(
base_branch: str,
head_branch: str,
title: str,
description: str,
) -> PullRequest
Create a pull request on GitHub.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str
|
Base branch (e.g., main) |
required |
head_branch
|
str
|
Head branch (e.g., feature-branch) |
required |
title
|
str
|
PR title |
required |
description
|
str
|
PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object with PR details |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR creation fails |
Source code in src/codemap/git/pr_generator/generator.py
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | |
update_pr
update_pr(
pr_number: int, title: str, description: str
) -> PullRequest
Update an existing pull request.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
pr_number
|
int
|
PR number |
required |
title
|
str
|
New PR title |
required |
description
|
str
|
New PR description |
required |
Returns:
| Type | Description |
|---|---|
PullRequest
|
Updated PullRequest object |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR update fails |
Source code in src/codemap/git/pr_generator/generator.py
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | |
get_existing_pr
get_existing_pr(branch_name: str) -> PullRequest | None
Get an existing PR for a branch.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
branch_name
|
str
|
Branch name |
required |
Returns:
| Type | Description |
|---|---|
PullRequest | None
|
PullRequest object if found, None otherwise |
Source code in src/codemap/git/pr_generator/generator.py
159 160 161 162 163 164 165 166 167 168 169 170 | |
create_or_update_pr
create_or_update_pr(
base_branch: str | None = None,
head_branch: str | None = None,
title: str | None = None,
description: str | None = None,
use_llm: bool = True,
pr_number: int | None = None,
) -> PullRequest
Create a new PR or update an existing one.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
base_branch
|
str | None
|
Base branch (defaults to default branch) |
None
|
head_branch
|
str | None
|
Head branch |
None
|
title
|
str | None
|
PR title (if None, will be generated) |
None
|
description
|
str | None
|
PR description (if None, will be generated) |
None
|
use_llm
|
bool
|
Whether to use LLM for content generation |
True
|
pr_number
|
int | None
|
PR number for update (if None, will create new PR) |
None
|
Returns:
| Type | Description |
|---|---|
PullRequest
|
PullRequest object |
Raises:
| Type | Description |
|---|---|
GitError
|
If PR creation/update fails |
Source code in src/codemap/git/pr_generator/generator.py
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | |
diff_splitter
Diff splitting package for CodeMap.
This package provides utilities for splitting Git diffs into logical chunks.
MIN_NAME_LENGTH_FOR_SIMILARITY
module-attribute
MIN_NAME_LENGTH_FOR_SIMILARITY: Final = 3
DiffChunk
dataclass
Represents a logical chunk of changes.
Source code in src/codemap/git/diff_splitter/schemas.py
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
__post_init__
__post_init__() -> None
Initialize default values.
Source code in src/codemap/git/diff_splitter/schemas.py
17 18 19 20 | |
__hash__
__hash__() -> int
Make DiffChunk hashable by using the object's id.
Returns:
| Type | Description |
|---|---|
int
|
Hash value based on the object's id |
Source code in src/codemap/git/diff_splitter/schemas.py
22 23 24 25 26 27 28 29 30 | |
__eq__
__eq__(other: object) -> bool
Compare DiffChunk objects for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
object
|
Another object to compare with |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the objects are the same instance, False otherwise |
Source code in src/codemap/git/diff_splitter/schemas.py
32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
DiffChunkData
dataclass
Dictionary-based representation of a DiffChunk for serialization.
Source code in src/codemap/git/diff_splitter/schemas.py
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | |
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
from_chunk
classmethod
from_chunk(chunk: DiffChunk) -> DiffChunkData
Create a DiffChunkData from a DiffChunk.
Source code in src/codemap/git/diff_splitter/schemas.py
58 59 60 61 62 63 64 65 66 67 | |
to_chunk
to_chunk() -> DiffChunk
Convert DiffChunkData to a DiffChunk.
Source code in src/codemap/git/diff_splitter/schemas.py
69 70 71 72 73 74 75 76 77 | |
to_dict
to_dict() -> dict[str, Any]
Convert to a dictionary.
Source code in src/codemap/git/diff_splitter/schemas.py
79 80 81 82 83 84 85 86 87 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
DiffSplitter
Splits Git diffs into logical chunks.
Source code in src/codemap/git/diff_splitter/splitter.py
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
__init__
__init__(
repo_root: Path,
similarity_threshold: float = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["similarity_threshold"],
directory_similarity_threshold: float = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["directory_similarity_threshold"],
min_chunks_for_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["min_chunks_for_consolidation"],
max_chunks_before_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["max_chunks_before_consolidation"],
max_file_size_for_llm: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_file_size_for_llm"],
max_log_diff_size: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_log_diff_size"],
model_name: str = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["model_name"],
) -> None
Initialize the diff splitter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_root
|
Path
|
Root directory of the Git repository |
required |
similarity_threshold
|
float
|
Threshold for grouping by content similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['similarity_threshold']
|
directory_similarity_threshold
|
float
|
Threshold for directory similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['directory_similarity_threshold']
|
min_chunks_for_consolidation
|
int
|
Min chunks to trigger consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['min_chunks_for_consolidation']
|
max_chunks_before_consolidation
|
int
|
Max chunks allowed before forced consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['max_chunks_before_consolidation']
|
max_file_size_for_llm
|
int
|
Max file size (bytes) to process for LLM context.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_file_size_for_llm']
|
max_log_diff_size
|
int
|
Max diff size (bytes) to log in debug mode.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_log_diff_size']
|
model_name
|
str
|
Name of the sentence-transformer model to use.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['model_name']
|
Source code in src/codemap/git/diff_splitter/splitter.py
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | |
repo_root
instance-attribute
repo_root = repo_root
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
directory_similarity_threshold
instance-attribute
directory_similarity_threshold = (
directory_similarity_threshold
)
min_chunks_for_consolidation
instance-attribute
min_chunks_for_consolidation = min_chunks_for_consolidation
max_chunks_before_consolidation
instance-attribute
max_chunks_before_consolidation = (
max_chunks_before_consolidation
)
max_file_size_for_llm
instance-attribute
max_file_size_for_llm = max_file_size_for_llm
max_log_diff_size
instance-attribute
max_log_diff_size = max_log_diff_size
model_name
instance-attribute
model_name = model_name
are_sentence_transformers_available
classmethod
are_sentence_transformers_available() -> bool
Check if sentence transformers are available.
Returns:
| Type | Description |
|---|---|
bool
|
True if sentence transformers are available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
120 121 122 123 124 125 126 127 128 129 | |
is_model_available
classmethod
is_model_available() -> bool
Check if embedding model is available.
Returns:
| Type | Description |
|---|---|
bool
|
True if embedding model is available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
131 132 133 134 135 136 137 138 139 140 | |
set_model_available
classmethod
set_model_available(value: bool) -> None
Set model availability flag.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
bool
|
Boolean indicating if model is available |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
142 143 144 145 146 147 148 149 150 151 | |
get_embedding_model
classmethod
get_embedding_model() -> EmbeddingModel | None
Get the embedding model.
Returns:
| Type | Description |
|---|---|
EmbeddingModel | None
|
The embedding model or None if not available |
Source code in src/codemap/git/diff_splitter/splitter.py
153 154 155 156 157 158 159 160 161 162 | |
set_embedding_model
classmethod
set_embedding_model(model: EmbeddingModel) -> None
Set the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model
|
EmbeddingModel
|
The embedding model to set |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
164 165 166 167 168 169 170 171 172 173 | |
split_diff
Split a diff into logical chunks using semantic splitting.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
tuple[list[DiffChunk], list[str]]
|
Tuple of (List of DiffChunk objects based on semantic analysis, List of filtered large files) |
Raises:
| Type | Description |
|---|---|
ValueError
|
If semantic splitting is not available or fails |
Source code in src/codemap/git/diff_splitter/splitter.py
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | |
encode_chunks
encode_chunks(chunks: list[str]) -> dict[str, ndarray]
Encode a list of text chunks using the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[str]
|
List of text chunks to encode |
required |
Returns:
| Type | Description |
|---|---|
dict[str, ndarray]
|
Dictionary with embeddings array |
Source code in src/codemap/git/diff_splitter/splitter.py
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
BaseSplitStrategy
Base class for diff splitting strategies.
Source code in src/codemap/git/diff_splitter/strategies.py
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | |
__init__
__init__(
embedding_model: EmbeddingModel | None = None,
) -> None
Initialize with optional embedding model.
Source code in src/codemap/git/diff_splitter/strategies.py
48 49 50 51 52 53 | |
split
Split the diff into chunks.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects |
Source code in src/codemap/git/diff_splitter/strategies.py
55 56 57 58 59 60 61 62 63 64 65 66 67 | |
FileSplitStrategy
Bases: BaseSplitStrategy
Strategy to split diffs by file.
Source code in src/codemap/git/diff_splitter/strategies.py
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | |
split
Split a diff into chunks by file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects, one per file |
Source code in src/codemap/git/diff_splitter/strategies.py
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | |
SemanticSplitStrategy
Bases: BaseSplitStrategy
Strategy to split diffs semantically.
Source code in src/codemap/git/diff_splitter/strategies.py
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 | |
__init__
__init__(
embedding_model: EmbeddingModel | None = None,
code_extensions: set[str] | None = None,
related_file_patterns: list[tuple[Pattern, Pattern]]
| None = None,
similarity_threshold: float = 0.4,
directory_similarity_threshold: float = 0.3,
min_chunks_for_consolidation: int = 2,
max_chunks_before_consolidation: int = 20,
max_file_size_for_llm: int | None = None,
) -> None
Initialize the SemanticSplitStrategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
embedding_model
|
EmbeddingModel | None
|
Optional embedding model instance |
None
|
code_extensions
|
set[str] | None
|
Optional set of code file extensions. Defaults to config. |
None
|
related_file_patterns
|
list[tuple[Pattern, Pattern]] | None
|
Optional list of related file patterns |
None
|
similarity_threshold
|
float
|
Threshold for grouping by content similarity. |
0.4
|
directory_similarity_threshold
|
float
|
Threshold for directory similarity. |
0.3
|
min_chunks_for_consolidation
|
int
|
Min chunks to trigger consolidation. |
2
|
max_chunks_before_consolidation
|
int
|
Max chunks allowed before forced consolidation. |
20
|
max_file_size_for_llm
|
int | None
|
Max file size for LLM processing. |
None
|
Source code in src/codemap/git/diff_splitter/strategies.py
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | |
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
directory_similarity_threshold
instance-attribute
directory_similarity_threshold = (
directory_similarity_threshold
)
min_chunks_for_consolidation
instance-attribute
min_chunks_for_consolidation = min_chunks_for_consolidation
max_chunks_before_consolidation
instance-attribute
max_chunks_before_consolidation = (
max_chunks_before_consolidation
)
max_file_size_for_llm
instance-attribute
max_file_size_for_llm = (
max_file_size_for_llm
if max_file_size_for_llm is not None
else DEFAULT_CONFIG["commit"]["diff_splitter"][
"max_file_size_for_llm"
]
)
code_extensions
instance-attribute
code_extensions = (
code_extensions
if code_extensions is not None
else set(
DEFAULT_CONFIG["commit"]["diff_splitter"][
"default_code_extensions"
]
)
)
related_file_patterns
instance-attribute
related_file_patterns = (
related_file_patterns
or _initialize_related_file_patterns()
)
split
Split a diff into chunks based on semantic relationships.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects based on semantic analysis |
Source code in src/codemap/git/diff_splitter/strategies.py
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | |
calculate_semantic_similarity
calculate_semantic_similarity(
emb1: list[float], emb2: list[float]
) -> float
Calculate semantic similarity (cosine similarity) between two embedding vectors.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
emb1
|
list[float]
|
First embedding vector |
required |
emb2
|
list[float]
|
Second embedding vector |
required |
Returns:
| Type | Description |
|---|---|
float
|
Similarity score between 0 and 1 |
Source code in src/codemap/git/diff_splitter/utils.py
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | |
create_chunk_description
create_chunk_description(
commit_type: str, files: list[str]
) -> str
Create a meaningful description for a chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commit_type
|
str
|
Type of commit (e.g., "feat", "fix") |
required |
files
|
list[str]
|
List of file paths |
required |
Returns:
| Type | Description |
|---|---|
str
|
Description string |
Source code in src/codemap/git/diff_splitter/utils.py
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | |
determine_commit_type
determine_commit_type(files: list[str]) -> str
Determine the appropriate commit type based on the files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of file paths |
required |
Returns:
| Type | Description |
|---|---|
str
|
Commit type string (e.g., "feat", "fix", "test", "docs", "chore") |
Source code in src/codemap/git/diff_splitter/utils.py
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | |
filter_valid_files
filter_valid_files(
files: list[str], is_test_environment: bool = False
) -> tuple[list[str], list[str]]
Filter invalid filenames and files based on existence and Git tracking.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of file paths to filter |
required |
is_test_environment
|
bool
|
Whether running in a test environment |
False
|
Returns:
| Type | Description |
|---|---|
tuple[list[str], list[str]]
|
Tuple of (valid_files, empty_list) - The second element is always an empty list now. |
Source code in src/codemap/git/diff_splitter/utils.py
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | |
get_language_specific_patterns
get_language_specific_patterns(language: str) -> list[str]
Get language-specific regex patterns for code structure.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
language
|
str
|
Programming language identifier |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
A list of regex patterns for the language, or empty list if not supported |
Source code in src/codemap/git/diff_splitter/utils.py
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | |
is_test_environment
is_test_environment() -> bool
Check if the code is running in a test environment.
Returns:
| Type | Description |
|---|---|
bool
|
True if in a test environment, False otherwise |
Source code in src/codemap/git/diff_splitter/utils.py
334 335 336 337 338 339 340 341 342 343 | |
schemas
Schema definitions for diff splitting.
DiffChunk
dataclass
Represents a logical chunk of changes.
Source code in src/codemap/git/diff_splitter/schemas.py
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
__post_init__
__post_init__() -> None
Initialize default values.
Source code in src/codemap/git/diff_splitter/schemas.py
17 18 19 20 | |
__hash__
__hash__() -> int
Make DiffChunk hashable by using the object's id.
Returns:
| Type | Description |
|---|---|
int
|
Hash value based on the object's id |
Source code in src/codemap/git/diff_splitter/schemas.py
22 23 24 25 26 27 28 29 30 | |
__eq__
__eq__(other: object) -> bool
Compare DiffChunk objects for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
object
|
Another object to compare with |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the objects are the same instance, False otherwise |
Source code in src/codemap/git/diff_splitter/schemas.py
32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
DiffChunkData
dataclass
Dictionary-based representation of a DiffChunk for serialization.
Source code in src/codemap/git/diff_splitter/schemas.py
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
from_chunk
classmethod
from_chunk(chunk: DiffChunk) -> DiffChunkData
Create a DiffChunkData from a DiffChunk.
Source code in src/codemap/git/diff_splitter/schemas.py
58 59 60 61 62 63 64 65 66 67 | |
to_chunk
to_chunk() -> DiffChunk
Convert DiffChunkData to a DiffChunk.
Source code in src/codemap/git/diff_splitter/schemas.py
69 70 71 72 73 74 75 76 77 | |
to_dict
to_dict() -> dict[str, Any]
Convert to a dictionary.
Source code in src/codemap/git/diff_splitter/schemas.py
79 80 81 82 83 84 85 86 87 | |
utils
Utility functions for diff splitting.
get_language_specific_patterns
get_language_specific_patterns(language: str) -> list[str]
Get language-specific regex patterns for code structure.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
language
|
str
|
Programming language identifier |
required |
Returns:
| Type | Description |
|---|---|
list[str]
|
A list of regex patterns for the language, or empty list if not supported |
Source code in src/codemap/git/diff_splitter/utils.py
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | |
determine_commit_type
determine_commit_type(files: list[str]) -> str
Determine the appropriate commit type based on the files.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of file paths |
required |
Returns:
| Type | Description |
|---|---|
str
|
Commit type string (e.g., "feat", "fix", "test", "docs", "chore") |
Source code in src/codemap/git/diff_splitter/utils.py
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | |
create_chunk_description
create_chunk_description(
commit_type: str, files: list[str]
) -> str
Create a meaningful description for a chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
commit_type
|
str
|
Type of commit (e.g., "feat", "fix") |
required |
files
|
list[str]
|
List of file paths |
required |
Returns:
| Type | Description |
|---|---|
str
|
Description string |
Source code in src/codemap/git/diff_splitter/utils.py
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | |
get_deleted_tracked_files
get_deleted_tracked_files() -> tuple[set, set]
Get list of deleted but tracked files from git status.
Returns:
| Type | Description |
|---|---|
tuple[set, set]
|
Tuple of (deleted_unstaged_files, deleted_staged_files) as sets |
Source code in src/codemap/git/diff_splitter/utils.py
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | |
filter_valid_files
filter_valid_files(
files: list[str], is_test_environment: bool = False
) -> tuple[list[str], list[str]]
Filter invalid filenames and files based on existence and Git tracking.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of file paths to filter |
required |
is_test_environment
|
bool
|
Whether running in a test environment |
False
|
Returns:
| Type | Description |
|---|---|
tuple[list[str], list[str]]
|
Tuple of (valid_files, empty_list) - The second element is always an empty list now. |
Source code in src/codemap/git/diff_splitter/utils.py
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | |
is_test_environment
is_test_environment() -> bool
Check if the code is running in a test environment.
Returns:
| Type | Description |
|---|---|
bool
|
True if in a test environment, False otherwise |
Source code in src/codemap/git/diff_splitter/utils.py
334 335 336 337 338 339 340 341 342 343 | |
calculate_semantic_similarity
calculate_semantic_similarity(
emb1: list[float], emb2: list[float]
) -> float
Calculate semantic similarity (cosine similarity) between two embedding vectors.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
emb1
|
list[float]
|
First embedding vector |
required |
emb2
|
list[float]
|
Second embedding vector |
required |
Returns:
| Type | Description |
|---|---|
float
|
Similarity score between 0 and 1 |
Source code in src/codemap/git/diff_splitter/utils.py
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | |
match_test_file_patterns
match_test_file_patterns(file1: str, file2: str) -> bool
Check if files match common test file patterns.
Source code in src/codemap/git/diff_splitter/utils.py
387 388 389 390 391 392 393 394 395 396 397 398 | |
have_similar_names
have_similar_names(file1: str, file2: str) -> bool
Check if files have similar base names.
Source code in src/codemap/git/diff_splitter/utils.py
401 402 403 404 405 406 | |
has_related_file_pattern
has_related_file_pattern(
file1: str,
file2: str,
related_file_patterns: Iterable[
tuple[Pattern, Pattern]
],
) -> bool
Check if files match known related patterns.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
file1
|
str
|
First file path |
required |
file2
|
str
|
Second file path |
required |
related_file_patterns
|
Iterable[tuple[Pattern, Pattern]]
|
Compiled regex pattern pairs to check against |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the files match a known pattern, False otherwise |
Source code in src/codemap/git/diff_splitter/utils.py
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 | |
are_files_related
are_files_related(
file1: str,
file2: str,
related_file_patterns: Iterable[
tuple[Pattern, Pattern]
],
) -> bool
Determine if two files are semantically related based on various criteria.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
file1
|
str
|
First file path |
required |
file2
|
str
|
Second file path |
required |
related_file_patterns
|
Iterable[tuple[Pattern, Pattern]]
|
Compiled regex pattern pairs for pattern matching |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the files are related, False otherwise |
Source code in src/codemap/git/diff_splitter/utils.py
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 | |
splitter
Diff splitting implementation for CodeMap.
logger
module-attribute
logger = getLogger(__name__)
MAX_DIFF_CONTENT_LENGTH
module-attribute
MAX_DIFF_CONTENT_LENGTH = 100000
MAX_DIFF_LINES
module-attribute
MAX_DIFF_LINES = 1000
SMALL_SECTION_SIZE
module-attribute
SMALL_SECTION_SIZE = 50
COMPLEX_SECTION_SIZE
module-attribute
COMPLEX_SECTION_SIZE = 100
DiffSplitter
Splits Git diffs into logical chunks.
Source code in src/codemap/git/diff_splitter/splitter.py
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
__init__
__init__(
repo_root: Path,
similarity_threshold: float = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["similarity_threshold"],
directory_similarity_threshold: float = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["directory_similarity_threshold"],
min_chunks_for_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["min_chunks_for_consolidation"],
max_chunks_before_consolidation: int = DEFAULT_CONFIG[
"commit"
]["diff_splitter"]["max_chunks_before_consolidation"],
max_file_size_for_llm: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_file_size_for_llm"],
max_log_diff_size: int = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["max_log_diff_size"],
model_name: str = DEFAULT_CONFIG["commit"][
"diff_splitter"
]["model_name"],
) -> None
Initialize the diff splitter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_root
|
Path
|
Root directory of the Git repository |
required |
similarity_threshold
|
float
|
Threshold for grouping by content similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['similarity_threshold']
|
directory_similarity_threshold
|
float
|
Threshold for directory similarity. |
DEFAULT_CONFIG['commit']['diff_splitter']['directory_similarity_threshold']
|
min_chunks_for_consolidation
|
int
|
Min chunks to trigger consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['min_chunks_for_consolidation']
|
max_chunks_before_consolidation
|
int
|
Max chunks allowed before forced consolidation. |
DEFAULT_CONFIG['commit']['diff_splitter']['max_chunks_before_consolidation']
|
max_file_size_for_llm
|
int
|
Max file size (bytes) to process for LLM context.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_file_size_for_llm']
|
max_log_diff_size
|
int
|
Max diff size (bytes) to log in debug mode.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['max_log_diff_size']
|
model_name
|
str
|
Name of the sentence-transformer model to use.
Defaults to value from |
DEFAULT_CONFIG['commit']['diff_splitter']['model_name']
|
Source code in src/codemap/git/diff_splitter/splitter.py
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | |
repo_root
instance-attribute
repo_root = repo_root
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
directory_similarity_threshold
instance-attribute
directory_similarity_threshold = (
directory_similarity_threshold
)
min_chunks_for_consolidation
instance-attribute
min_chunks_for_consolidation = min_chunks_for_consolidation
max_chunks_before_consolidation
instance-attribute
max_chunks_before_consolidation = (
max_chunks_before_consolidation
)
max_file_size_for_llm
instance-attribute
max_file_size_for_llm = max_file_size_for_llm
max_log_diff_size
instance-attribute
max_log_diff_size = max_log_diff_size
model_name
instance-attribute
model_name = model_name
are_sentence_transformers_available
classmethod
are_sentence_transformers_available() -> bool
Check if sentence transformers are available.
Returns:
| Type | Description |
|---|---|
bool
|
True if sentence transformers are available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
120 121 122 123 124 125 126 127 128 129 | |
is_model_available
classmethod
is_model_available() -> bool
Check if embedding model is available.
Returns:
| Type | Description |
|---|---|
bool
|
True if embedding model is available, False otherwise |
Source code in src/codemap/git/diff_splitter/splitter.py
131 132 133 134 135 136 137 138 139 140 | |
set_model_available
classmethod
set_model_available(value: bool) -> None
Set model availability flag.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
bool
|
Boolean indicating if model is available |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
142 143 144 145 146 147 148 149 150 151 | |
get_embedding_model
classmethod
get_embedding_model() -> EmbeddingModel | None
Get the embedding model.
Returns:
| Type | Description |
|---|---|
EmbeddingModel | None
|
The embedding model or None if not available |
Source code in src/codemap/git/diff_splitter/splitter.py
153 154 155 156 157 158 159 160 161 162 | |
set_embedding_model
classmethod
set_embedding_model(model: EmbeddingModel) -> None
Set the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model
|
EmbeddingModel
|
The embedding model to set |
required |
Source code in src/codemap/git/diff_splitter/splitter.py
164 165 166 167 168 169 170 171 172 173 | |
split_diff
Split a diff into logical chunks using semantic splitting.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
tuple[list[DiffChunk], list[str]]
|
Tuple of (List of DiffChunk objects based on semantic analysis, List of filtered large files) |
Raises:
| Type | Description |
|---|---|
ValueError
|
If semantic splitting is not available or fails |
Source code in src/codemap/git/diff_splitter/splitter.py
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | |
encode_chunks
encode_chunks(chunks: list[str]) -> dict[str, ndarray]
Encode a list of text chunks using the embedding model.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[str]
|
List of text chunks to encode |
required |
Returns:
| Type | Description |
|---|---|
dict[str, ndarray]
|
Dictionary with embeddings array |
Source code in src/codemap/git/diff_splitter/splitter.py
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | |
constants
Constants for diff splitting functionality.
MIN_NAME_LENGTH_FOR_SIMILARITY
module-attribute
MIN_NAME_LENGTH_FOR_SIMILARITY: Final = 3
EPSILON
module-attribute
EPSILON = 1e-10
MAX_FILES_PER_GROUP
module-attribute
MAX_FILES_PER_GROUP: Final = 10
strategies
Strategies for splitting git diffs into logical chunks.
logger
module-attribute
logger = getLogger(__name__)
EXPECTED_TUPLE_SIZE
module-attribute
EXPECTED_TUPLE_SIZE = 2
EmbeddingModel
Bases: Protocol
Protocol for embedding models.
Source code in src/codemap/git/diff_splitter/strategies.py
37 38 39 40 41 42 | |
encode
encode(texts: Sequence[str], **kwargs: Any) -> ndarray
Encode texts into embeddings.
Source code in src/codemap/git/diff_splitter/strategies.py
40 41 42 | |
BaseSplitStrategy
Base class for diff splitting strategies.
Source code in src/codemap/git/diff_splitter/strategies.py
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | |
__init__
__init__(
embedding_model: EmbeddingModel | None = None,
) -> None
Initialize with optional embedding model.
Source code in src/codemap/git/diff_splitter/strategies.py
48 49 50 51 52 53 | |
split
Split the diff into chunks.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects |
Source code in src/codemap/git/diff_splitter/strategies.py
55 56 57 58 59 60 61 62 63 64 65 66 67 | |
FileSplitStrategy
Bases: BaseSplitStrategy
Strategy to split diffs by file.
Source code in src/codemap/git/diff_splitter/strategies.py
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | |
split
Split a diff into chunks by file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects, one per file |
Source code in src/codemap/git/diff_splitter/strategies.py
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | |
SemanticSplitStrategy
Bases: BaseSplitStrategy
Strategy to split diffs semantically.
Source code in src/codemap/git/diff_splitter/strategies.py
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 | |
__init__
__init__(
embedding_model: EmbeddingModel | None = None,
code_extensions: set[str] | None = None,
related_file_patterns: list[tuple[Pattern, Pattern]]
| None = None,
similarity_threshold: float = 0.4,
directory_similarity_threshold: float = 0.3,
min_chunks_for_consolidation: int = 2,
max_chunks_before_consolidation: int = 20,
max_file_size_for_llm: int | None = None,
) -> None
Initialize the SemanticSplitStrategy.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
embedding_model
|
EmbeddingModel | None
|
Optional embedding model instance |
None
|
code_extensions
|
set[str] | None
|
Optional set of code file extensions. Defaults to config. |
None
|
related_file_patterns
|
list[tuple[Pattern, Pattern]] | None
|
Optional list of related file patterns |
None
|
similarity_threshold
|
float
|
Threshold for grouping by content similarity. |
0.4
|
directory_similarity_threshold
|
float
|
Threshold for directory similarity. |
0.3
|
min_chunks_for_consolidation
|
int
|
Min chunks to trigger consolidation. |
2
|
max_chunks_before_consolidation
|
int
|
Max chunks allowed before forced consolidation. |
20
|
max_file_size_for_llm
|
int | None
|
Max file size for LLM processing. |
None
|
Source code in src/codemap/git/diff_splitter/strategies.py
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 | |
similarity_threshold
instance-attribute
similarity_threshold = similarity_threshold
directory_similarity_threshold
instance-attribute
directory_similarity_threshold = (
directory_similarity_threshold
)
min_chunks_for_consolidation
instance-attribute
min_chunks_for_consolidation = min_chunks_for_consolidation
max_chunks_before_consolidation
instance-attribute
max_chunks_before_consolidation = (
max_chunks_before_consolidation
)
max_file_size_for_llm
instance-attribute
max_file_size_for_llm = (
max_file_size_for_llm
if max_file_size_for_llm is not None
else DEFAULT_CONFIG["commit"]["diff_splitter"][
"max_file_size_for_llm"
]
)
code_extensions
instance-attribute
code_extensions = (
code_extensions
if code_extensions is not None
else set(
DEFAULT_CONFIG["commit"]["diff_splitter"][
"default_code_extensions"
]
)
)
related_file_patterns
instance-attribute
related_file_patterns = (
related_file_patterns
or _initialize_related_file_patterns()
)
split
Split a diff into chunks based on semantic relationships.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
diff
|
GitDiff
|
GitDiff object to split |
required |
Returns:
| Type | Description |
|---|---|
list[DiffChunk]
|
List of DiffChunk objects based on semantic analysis |
Source code in src/codemap/git/diff_splitter/strategies.py
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | |
commit_generator
Commit message generation package for CodeMap.
This package provides modules for generating commit messages using LLMs.
DiffChunk
dataclass
Represents a logical chunk of changes.
Source code in src/codemap/git/diff_splitter/schemas.py
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
files
instance-attribute
files: list[str]
content
instance-attribute
content: str
description
class-attribute
instance-attribute
description: str | None = None
is_llm_generated
class-attribute
instance-attribute
is_llm_generated: bool = False
filtered_files
class-attribute
instance-attribute
filtered_files: list[str] | None = None
__post_init__
__post_init__() -> None
Initialize default values.
Source code in src/codemap/git/diff_splitter/schemas.py
17 18 19 20 | |
__hash__
__hash__() -> int
Make DiffChunk hashable by using the object's id.
Returns:
| Type | Description |
|---|---|
int
|
Hash value based on the object's id |
Source code in src/codemap/git/diff_splitter/schemas.py
22 23 24 25 26 27 28 29 30 | |
__eq__
__eq__(other: object) -> bool
Compare DiffChunk objects for equality.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
other
|
object
|
Another object to compare with |
required |
Returns:
| Type | Description |
|---|---|
bool
|
True if the objects are the same instance, False otherwise |
Source code in src/codemap/git/diff_splitter/schemas.py
32 33 34 35 36 37 38 39 40 41 42 43 44 45 | |
__init__
__init__(
files: list[str],
content: str,
description: str | None = None,
is_llm_generated: bool = False,
filtered_files: list[str] | None = None,
) -> None
CommitMessageGenerator
Generates commit messages using LLMs.
Source code in src/codemap/git/commit_generator/generator.py
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 | |
__init__
__init__(
repo_root: Path,
llm_client: LLMClient,
prompt_template: str,
config_loader: ConfigLoader,
) -> None
Initialize the commit message generator.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_root
|
Path
|
Root directory of the Git repository |
required |
llm_client
|
LLMClient
|
LLMClient instance to use |
required |
prompt_template
|
str
|
Custom prompt template to use |
required |
config_loader
|
ConfigLoader
|
ConfigLoader instance to use for configuration |
required |
Source code in src/codemap/git/commit_generator/generator.py
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | |
repo_root
instance-attribute
repo_root = repo_root
prompt_template
instance-attribute
prompt_template = prompt_template
client
instance-attribute
client = llm_client
max_tokens
instance-attribute
max_tokens = get('max_context_tokens', 4000)
use_lod_context
instance-attribute
use_lod_context = get('use_lod_context', True)
extract_file_info
extract_file_info(chunk: DiffChunk) -> dict[str, Any]
Extract file information from the diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk object to extract information from |
required |
Returns:
| Type | Description |
|---|---|
dict[str, Any]
|
Dictionary with information about files |
Source code in src/codemap/git/commit_generator/generator.py
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | |
get_commit_convention
get_commit_convention() -> dict[str, Any]
Get commit convention settings from config.
Source code in src/codemap/git/commit_generator/generator.py
102 103 104 105 | |
format_json_to_commit_message
format_json_to_commit_message(content: str) -> str
Format a JSON string as a conventional commit message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
content
|
str
|
JSON content string from LLM response |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted commit message string |
Source code in src/codemap/git/commit_generator/generator.py
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 | |
fallback_generation
fallback_generation(chunk: DiffChunk) -> str
Generate a fallback commit message without LLM.
This is used when LLM-based generation fails or is disabled.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk object to generate message for |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated commit message |
Source code in src/codemap/git/commit_generator/generator.py
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | |
generate_message
generate_message(chunk: DiffChunk) -> tuple[str, bool]
Generate a commit message for a diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk to generate message for |
required |
Returns:
| Type | Description |
|---|---|
tuple[str, bool]
|
Generated message and success flag |
Source code in src/codemap/git/commit_generator/generator.py
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 | |
generate_message_with_linting
generate_message_with_linting(
chunk: DiffChunk,
retry_count: int = 1,
max_retries: int = 3,
) -> tuple[str, bool, bool, list[str]]
Generate a commit message with linting verification.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
The DiffChunk to generate a message for |
required |
retry_count
|
int
|
Current retry count (default: 1) |
1
|
max_retries
|
int
|
Maximum number of retries for linting (default: 3) |
3
|
Returns:
| Type | Description |
|---|---|
tuple[str, bool, bool, list[str]]
|
Tuple of (message, used_llm, passed_linting, lint_messages) |
Source code in src/codemap/git/commit_generator/generator.py
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 | |
get_config_loader
get_config_loader() -> ConfigLoader
Get the ConfigLoader instance used by this generator.
Returns:
| Type | Description |
|---|---|
ConfigLoader
|
ConfigLoader instance |
Source code in src/codemap/git/commit_generator/generator.py
668 669 670 671 672 673 674 675 676 | |
DEFAULT_PROMPT_TEMPLATE
module-attribute
DEFAULT_PROMPT_TEMPLATE = '\nYou are an AI assistant generating Conventional Commit 1.0.0 messages from Git diffs.\n\n**Format:**\n```\n<type>[optional scope]: <description>\n\n[optional body]\n\n[optional footer(s)]\n```\n\n**Instructions & Rules:**\n\n1. **Type:** REQUIRED. Must be lowercase and one of: {convention[types]}.\n * `feat`: New feature (MINOR SemVer).\n * `fix`: Bug fix (PATCH SemVer).\n * Other types (`build`, `chore`, `ci`, `docs`, `style`, `refactor`, `perf`, `test`, etc.) are allowed.\n2. **Scope:** OPTIONAL. Lowercase noun(s) in parentheses describing the code section (e.g., `(parser)`).\n * Keep short (1-2 words).\n3. **Description:** REQUIRED. Concise, imperative, present tense summary of *what* changed and *why* based on the diff.\n * Must follow the colon and space.\n * Must be >= 10 characters.\n * Must NOT end with a period.\n * The entire header line (`<type>[scope]: <description>`) must be <= {convention[max_length]} characters.\n4. **Body:** OPTIONAL. Explain *why* and *how*. Start one blank line after the description.\n\t*\tUse the body only if extra context is needed to understand the changes.\n\t*\tDo not use the body to add unrelated information.\n\t*\tDo not use the body to explain *what* was changed.\n\t*\tTry to keep the body concise and to the point.\n5. **Footer(s):** OPTIONAL. Format `Token: value` or `Token # value`.\n * Start one blank line after the body.\n * Use `-` for spaces in tokens (e.g., `Reviewed-by`).\n6. **BREAKING CHANGE:** Indicate with `!` before the colon in the header (e.g., `feat(api)!: ...`)\n * OR with a `BREAKING CHANGE: <description>` footer (MUST be uppercase).\n * Correlates with MAJOR SemVer.\n * If `!` is used, the description explains the break.\n7. **Special Case - Binary Files:**\n * For binary file changes, use `chore` type with a scope indicating the file type (e.g., `(assets)`, `(images)`, `(builds)`)\n * Be specific about what changed (e.g., "update image assets", "add new icon files", "replace binary database")\n * If the diff content is empty or shows binary file changes, focus on the filenames to determine the purpose\n\n**Input:**\n\n* File notes: {files}\n* Git diff: {diff}\n\n**Output Requirements:**\n\n* Respond with ONLY the raw commit message string.\n* NO extra text, explanations, or markdown formatting (like ```).\n* STRICTLY OMIT footers: `Related Issue #`, `Closes #`, `REVIEWED-BY`, `TRACKING #`, `APPROVED`.\n\n**(IMPORTANT) Following JSON Schema must be followed for Output:**\n{schema}\n\n---\nPlease return the commit message in a valid json format. Analyze the following diff and generate the commit message:\n\n{diff}\n'
prepare_prompt
prepare_prompt(
template: str,
diff_content: str,
file_info: dict[str, Any],
convention: dict[str, Any],
extra_context: dict[str, Any] | None = None,
) -> str
Prepare the prompt for the LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
template
|
str
|
Prompt template to use |
required |
diff_content
|
str
|
Diff content to include |
required |
file_info
|
dict[str, Any]
|
Information about files in the diff |
required |
convention
|
dict[str, Any]
|
Commit convention settings |
required |
extra_context
|
dict[str, Any] | None
|
Optional additional context values for the template |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted prompt |
Source code in src/codemap/git/commit_generator/prompts.py
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | |
COMMIT_MESSAGE_SCHEMA
module-attribute
COMMIT_MESSAGE_SCHEMA = {
"type": "object",
"properties": {
"type": {
"type": "string",
"description": "The type of change (e.g., feat, fix, docs, style, refactor, perf, test, chore)",
},
"scope": {
"type": ["string", "null"],
"description": "The scope of the change (e.g., component affected)",
},
"description": {
"type": "string",
"description": "A short, imperative-tense description of the change",
},
"body": {
"type": ["string", "null"],
"description": "A longer description of the changes, explaining why and how",
},
"breaking": {
"type": "boolean",
"description": "Whether this is a breaking change",
"default": False,
},
"footers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"token": {
"type": "string",
"description": "Footer token (e.g., 'BREAKING CHANGE', 'Fixes', 'Refs')",
},
"value": {
"type": "string",
"description": "Footer value",
},
},
"required": ["token", "value"],
},
"default": [],
},
},
"required": ["type", "description"],
}
CommitMessageSchema
Bases: TypedDict
TypedDict representing the structured commit message output.
Source code in src/codemap/git/commit_generator/schemas.py
8 9 10 11 12 13 14 15 16 | |
type
instance-attribute
type: str
scope
instance-attribute
scope: str | None
description
instance-attribute
description: str
body
instance-attribute
body: str | None
breaking
instance-attribute
breaking: bool
footers
instance-attribute
footers: list[dict[str, str]]
clean_message_for_linting
clean_message_for_linting(message: str) -> str
Clean a commit message for linting.
Removes extra newlines, trims whitespace, etc.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to clean |
required |
Returns:
| Type | Description |
|---|---|
str
|
The cleaned commit message |
Source code in src/codemap/git/commit_generator/utils.py
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | |
lint_commit_message
lint_commit_message(
message: str,
repo_root: Path | None = None,
config_loader: ConfigLoader | None = None,
) -> tuple[bool, str | None]
Lint a commit message.
Checks if it adheres to Conventional Commits format using internal CommitLinter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to lint |
required |
repo_root
|
Path | None
|
Repository root path |
None
|
config_loader
|
ConfigLoader | None
|
Configuration loader instance |
None
|
Returns:
| Type | Description |
|---|---|
tuple[bool, str | None]
|
Tuple of (is_valid, error_message) |
Source code in src/codemap/git/commit_generator/utils.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | |
schemas
Schemas and data structures for commit message generation.
CommitMessageSchema
Bases: TypedDict
TypedDict representing the structured commit message output.
Source code in src/codemap/git/commit_generator/schemas.py
8 9 10 11 12 13 14 15 16 | |
type
instance-attribute
type: str
scope
instance-attribute
scope: str | None
description
instance-attribute
description: str
body
instance-attribute
body: str | None
breaking
instance-attribute
breaking: bool
footers
instance-attribute
footers: list[dict[str, str]]
COMMIT_MESSAGE_SCHEMA
module-attribute
COMMIT_MESSAGE_SCHEMA = {
"type": "object",
"properties": {
"type": {
"type": "string",
"description": "The type of change (e.g., feat, fix, docs, style, refactor, perf, test, chore)",
},
"scope": {
"type": ["string", "null"],
"description": "The scope of the change (e.g., component affected)",
},
"description": {
"type": "string",
"description": "A short, imperative-tense description of the change",
},
"body": {
"type": ["string", "null"],
"description": "A longer description of the changes, explaining why and how",
},
"breaking": {
"type": "boolean",
"description": "Whether this is a breaking change",
"default": False,
},
"footers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"token": {
"type": "string",
"description": "Footer token (e.g., 'BREAKING CHANGE', 'Fixes', 'Refs')",
},
"value": {
"type": "string",
"description": "Footer value",
},
},
"required": ["token", "value"],
},
"default": [],
},
},
"required": ["type", "description"],
}
prompts
Prompt templates for commit message generation.
DEFAULT_PROMPT_TEMPLATE
module-attribute
DEFAULT_PROMPT_TEMPLATE = '\nYou are an AI assistant generating Conventional Commit 1.0.0 messages from Git diffs.\n\n**Format:**\n```\n<type>[optional scope]: <description>\n\n[optional body]\n\n[optional footer(s)]\n```\n\n**Instructions & Rules:**\n\n1. **Type:** REQUIRED. Must be lowercase and one of: {convention[types]}.\n * `feat`: New feature (MINOR SemVer).\n * `fix`: Bug fix (PATCH SemVer).\n * Other types (`build`, `chore`, `ci`, `docs`, `style`, `refactor`, `perf`, `test`, etc.) are allowed.\n2. **Scope:** OPTIONAL. Lowercase noun(s) in parentheses describing the code section (e.g., `(parser)`).\n * Keep short (1-2 words).\n3. **Description:** REQUIRED. Concise, imperative, present tense summary of *what* changed and *why* based on the diff.\n * Must follow the colon and space.\n * Must be >= 10 characters.\n * Must NOT end with a period.\n * The entire header line (`<type>[scope]: <description>`) must be <= {convention[max_length]} characters.\n4. **Body:** OPTIONAL. Explain *why* and *how*. Start one blank line after the description.\n\t*\tUse the body only if extra context is needed to understand the changes.\n\t*\tDo not use the body to add unrelated information.\n\t*\tDo not use the body to explain *what* was changed.\n\t*\tTry to keep the body concise and to the point.\n5. **Footer(s):** OPTIONAL. Format `Token: value` or `Token # value`.\n * Start one blank line after the body.\n * Use `-` for spaces in tokens (e.g., `Reviewed-by`).\n6. **BREAKING CHANGE:** Indicate with `!` before the colon in the header (e.g., `feat(api)!: ...`)\n * OR with a `BREAKING CHANGE: <description>` footer (MUST be uppercase).\n * Correlates with MAJOR SemVer.\n * If `!` is used, the description explains the break.\n7. **Special Case - Binary Files:**\n * For binary file changes, use `chore` type with a scope indicating the file type (e.g., `(assets)`, `(images)`, `(builds)`)\n * Be specific about what changed (e.g., "update image assets", "add new icon files", "replace binary database")\n * If the diff content is empty or shows binary file changes, focus on the filenames to determine the purpose\n\n**Input:**\n\n* File notes: {files}\n* Git diff: {diff}\n\n**Output Requirements:**\n\n* Respond with ONLY the raw commit message string.\n* NO extra text, explanations, or markdown formatting (like ```).\n* STRICTLY OMIT footers: `Related Issue #`, `Closes #`, `REVIEWED-BY`, `TRACKING #`, `APPROVED`.\n\n**(IMPORTANT) Following JSON Schema must be followed for Output:**\n{schema}\n\n---\nPlease return the commit message in a valid json format. Analyze the following diff and generate the commit message:\n\n{diff}\n'
get_lint_prompt_template
get_lint_prompt_template() -> str
Get the prompt template for lint feedback.
Returns:
| Type | Description |
|---|---|
str
|
The prompt template with lint feedback placeholders |
Source code in src/codemap/git/commit_generator/prompts.py
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | |
prepare_prompt
prepare_prompt(
template: str,
diff_content: str,
file_info: dict[str, Any],
convention: dict[str, Any],
extra_context: dict[str, Any] | None = None,
) -> str
Prepare the prompt for the LLM.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
template
|
str
|
Prompt template to use |
required |
diff_content
|
str
|
Diff content to include |
required |
file_info
|
dict[str, Any]
|
Information about files in the diff |
required |
convention
|
dict[str, Any]
|
Commit convention settings |
required |
extra_context
|
dict[str, Any] | None
|
Optional additional context values for the template |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted prompt |
Source code in src/codemap/git/commit_generator/prompts.py
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | |
prepare_lint_prompt
prepare_lint_prompt(
template: str,
file_info: dict[str, Any],
convention: dict[str, Any],
lint_messages: list[str],
original_message: str | None = None,
) -> str
Prepare a prompt with lint feedback for regeneration.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
template
|
str
|
Prompt template to use |
required |
file_info
|
dict[str, Any]
|
Information about files in the diff |
required |
convention
|
dict[str, Any]
|
Commit convention settings |
required |
lint_messages
|
list[str]
|
List of linting error messages |
required |
original_message
|
str | None
|
The original failed commit message |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Enhanced prompt with linting feedback |
Source code in src/codemap/git/commit_generator/prompts.py
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | |
utils
Utility functions for commit message generation.
logger
module-attribute
logger = getLogger(__name__)
clean_message_for_linting
clean_message_for_linting(message: str) -> str
Clean a commit message for linting.
Removes extra newlines, trims whitespace, etc.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to clean |
required |
Returns:
| Type | Description |
|---|---|
str
|
The cleaned commit message |
Source code in src/codemap/git/commit_generator/utils.py
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | |
lint_commit_message
lint_commit_message(
message: str,
repo_root: Path | None = None,
config_loader: ConfigLoader | None = None,
) -> tuple[bool, str | None]
Lint a commit message.
Checks if it adheres to Conventional Commits format using internal CommitLinter.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
message
|
str
|
The commit message to lint |
required |
repo_root
|
Path | None
|
Repository root path |
None
|
config_loader
|
ConfigLoader | None
|
Configuration loader instance |
None
|
Returns:
| Type | Description |
|---|---|
tuple[bool, str | None]
|
Tuple of (is_valid, error_message) |
Source code in src/codemap/git/commit_generator/utils.py
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | |
save_working_directory_state
save_working_directory_state(
files: list[str], output_file: str
) -> bool
Save the current state of specified files to a patch file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
files
|
list[str]
|
List of file paths |
required |
output_file
|
str
|
Path to output patch file |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
Whether the operation was successful |
Source code in src/codemap/git/commit_generator/utils.py
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | |
restore_working_directory_state
restore_working_directory_state(patch_file: str) -> bool
Restore the working directory state from a patch file.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
patch_file
|
str
|
Path to patch file |
required |
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
Whether the operation was successful |
Source code in src/codemap/git/commit_generator/utils.py
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | |
format_commit_json
format_commit_json(
content: str, config_loader: ConfigLoader | None = None
) -> str
Format a JSON string as a conventional commit message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
content
|
str
|
JSON content string from LLM response |
required |
config_loader
|
ConfigLoader | None
|
Optional ConfigLoader for commit conventions |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted commit message string |
Source code in src/codemap/git/commit_generator/utils.py
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 | |
prepare_prompt
prepare_prompt(
template: str,
diff_content: str,
file_info: dict[str, Any],
convention: dict[str, Any],
extra_context: dict[str, Any] | None = None,
) -> str
Prepare a prompt for LLM commit message generation.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
template
|
str
|
The prompt template string |
required |
diff_content
|
str
|
The diff content to include in the prompt |
required |
file_info
|
dict[str, Any]
|
Dictionary of file information |
required |
convention
|
dict[str, Any]
|
Commit convention configuration |
required |
extra_context
|
dict[str, Any] | None
|
Additional context variables for the template |
None
|
Returns:
| Type | Description |
|---|---|
str
|
Formatted prompt string |
Source code in src/codemap/git/commit_generator/utils.py
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | |
command
Main commit command implementation for CodeMap.
logger
module-attribute
logger = getLogger(__name__)
MAX_FILES_BEFORE_BATCHING
module-attribute
MAX_FILES_BEFORE_BATCHING = 10
MAX_FILE_CONTENT_LINES
module-attribute
MAX_FILE_CONTENT_LINES = 300
MAX_TOTAL_CONTENT_LINES
module-attribute
MAX_TOTAL_CONTENT_LINES = 1000
MIN_PORCELAIN_LINE_LENGTH
module-attribute
MIN_PORCELAIN_LINE_LENGTH = 3
ExitCommandError
Bases: Exception
Exception to signal an exit command.
Source code in src/codemap/git/commit_generator/command.py
53 54 | |
CommitCommand
Handles the commit command workflow.
Source code in src/codemap/git/commit_generator/command.py
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 | |
__init__
__init__(
path: Path | None = None,
model: str = "gpt-4o-mini",
bypass_hooks: bool = False,
) -> None
Initialize the commit command.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path
|
Path | None
|
Optional path to start from |
None
|
model
|
str
|
LLM model to use for commit message generation |
'gpt-4o-mini'
|
bypass_hooks
|
bool
|
Whether to bypass git hooks with --no-verify |
False
|
Source code in src/codemap/git/commit_generator/command.py
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | |
target_files
instance-attribute
target_files = []
message_generator
instance-attribute
message_generator = CommitMessageGenerator(
repo_root=repo_root,
llm_client=llm_client,
prompt_template=DEFAULT_PROMPT_TEMPLATE,
config_loader=config_loader,
)
error_state
instance-attribute
error_state = None
bypass_hooks
instance-attribute
bypass_hooks = bypass_hooks
process_all_chunks
process_all_chunks(
chunks: list[DiffChunk],
grand_total: int,
interactive: bool = True,
) -> bool
Process all generated chunks.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunks
|
list[DiffChunk]
|
List of DiffChunk objects to process |
required |
grand_total
|
int
|
Total number of chunks initially generated |
required |
interactive
|
bool
|
Whether to run in interactive mode |
True
|
Returns:
| Type | Description |
|---|---|
bool
|
True if all chunks were processed successfully, False otherwise |
Source code in src/codemap/git/commit_generator/command.py
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 | |
run
run(interactive: bool = True) -> bool
Run the commit command workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
interactive
|
bool
|
Whether to run in interactive mode. Defaults to True. |
True
|
Returns:
| Type | Description |
|---|---|
bool
|
True if the process completed (even if aborted), False on unexpected error. |
Source code in src/codemap/git/commit_generator/command.py
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | |
SemanticCommitCommand
Bases: CommitCommand
Handles the semantic commit command workflow.
Source code in src/codemap/git/commit_generator/command.py
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 | |
__init__
__init__(
path: Path | None = None,
model: str = "gpt-4o-mini",
bypass_hooks: bool = False,
embedding_model: str = "all-MiniLM-L6-v2",
clustering_method: str = "agglomerative",
similarity_threshold: float = 0.6,
) -> None
Initialize the semantic commit command.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
path
|
Path | None
|
Optional path to start from |
None
|
model
|
str
|
LLM model to use for commit message generation |
'gpt-4o-mini'
|
bypass_hooks
|
bool
|
Whether to bypass git hooks with --no-verify |
False
|
embedding_model
|
str
|
Model to use for generating embeddings |
'all-MiniLM-L6-v2'
|
clustering_method
|
str
|
Method to use for clustering ("agglomerative" or "dbscan") |
'agglomerative'
|
similarity_threshold
|
float
|
Threshold for group similarity to trigger merging |
0.6
|
Source code in src/codemap/git/commit_generator/command.py
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 | |
resolver
instance-attribute
resolver = FileIntegrityResolver(
similarity_threshold=similarity_threshold
)
committed_files
instance-attribute
committed_files: set[str] = set()
is_pathspec_mode
instance-attribute
is_pathspec_mode = False
all_repo_files
instance-attribute
all_repo_files: set[str] = set()
target_files
instance-attribute
target_files: list[str] = []
run
run(
interactive: bool = True,
pathspecs: list[str] | None = None,
) -> bool
Run the semantic commit command workflow.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
interactive
|
bool
|
Whether to run in interactive mode |
True
|
pathspecs
|
list[str] | None
|
Optional list of path specifications |
None
|
Returns:
| Name | Type | Description |
|---|---|---|
bool |
bool
|
Whether the process completed successfully |
Source code in src/codemap/git/commit_generator/command.py
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 | |
generator
Generator module for commit messages.
logger
module-attribute
logger = getLogger(__name__)
MAX_DEBUG_CONTENT_LENGTH
module-attribute
MAX_DEBUG_CONTENT_LENGTH = 100
EXPECTED_PARTS_COUNT
module-attribute
EXPECTED_PARTS_COUNT = 2
CommitMessageGenerator
Generates commit messages using LLMs.
Source code in src/codemap/git/commit_generator/generator.py
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 | |
__init__
__init__(
repo_root: Path,
llm_client: LLMClient,
prompt_template: str,
config_loader: ConfigLoader,
) -> None
Initialize the commit message generator.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
repo_root
|
Path
|
Root directory of the Git repository |
required |
llm_client
|
LLMClient
|
LLMClient instance to use |
required |
prompt_template
|
str
|
Custom prompt template to use |
required |
config_loader
|
ConfigLoader
|
ConfigLoader instance to use for configuration |
required |
Source code in src/codemap/git/commit_generator/generator.py
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | |
repo_root
instance-attribute
repo_root = repo_root
prompt_template
instance-attribute
prompt_template = prompt_template
client
instance-attribute
client = llm_client
max_tokens
instance-attribute
max_tokens = get('max_context_tokens', 4000)
use_lod_context
instance-attribute
use_lod_context = get('use_lod_context', True)
extract_file_info
extract_file_info(chunk: DiffChunk) -> dict[str, Any]
Extract file information from the diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk object to extract information from |
required |
Returns:
| Type | Description |
|---|---|
dict[str, Any]
|
Dictionary with information about files |
Source code in src/codemap/git/commit_generator/generator.py
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | |
get_commit_convention
get_commit_convention() -> dict[str, Any]
Get commit convention settings from config.
Source code in src/codemap/git/commit_generator/generator.py
102 103 104 105 | |
format_json_to_commit_message
format_json_to_commit_message(content: str) -> str
Format a JSON string as a conventional commit message.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
content
|
str
|
JSON content string from LLM response |
required |
Returns:
| Type | Description |
|---|---|
str
|
Formatted commit message string |
Source code in src/codemap/git/commit_generator/generator.py
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 | |
fallback_generation
fallback_generation(chunk: DiffChunk) -> str
Generate a fallback commit message without LLM.
This is used when LLM-based generation fails or is disabled.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk object to generate message for |
required |
Returns:
| Type | Description |
|---|---|
str
|
Generated commit message |
Source code in src/codemap/git/commit_generator/generator.py
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | |
generate_message
generate_message(chunk: DiffChunk) -> tuple[str, bool]
Generate a commit message for a diff chunk.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
Diff chunk to generate message for |
required |
Returns:
| Type | Description |
|---|---|
tuple[str, bool]
|
Generated message and success flag |
Source code in src/codemap/git/commit_generator/generator.py
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 | |
generate_message_with_linting
generate_message_with_linting(
chunk: DiffChunk,
retry_count: int = 1,
max_retries: int = 3,
) -> tuple[str, bool, bool, list[str]]
Generate a commit message with linting verification.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
chunk
|
DiffChunk
|
The DiffChunk to generate a message for |
required |
retry_count
|
int
|
Current retry count (default: 1) |
1
|
max_retries
|
int
|
Maximum number of retries for linting (default: 3) |
3
|
Returns:
| Type | Description |
|---|---|
tuple[str, bool, bool, list[str]]
|
Tuple of (message, used_llm, passed_linting, lint_messages) |
Source code in src/codemap/git/commit_generator/generator.py
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 | |
get_config_loader
get_config_loader() -> ConfigLoader
Get the ConfigLoader instance used by this generator.
Returns:
| Type | Description |
|---|---|
ConfigLoader
|
ConfigLoader instance |
Source code in src/codemap/git/commit_generator/generator.py
668 669 670 671 672 673 674 675 676 | |